{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "import sys\n",
    "sys.path.append('../code')\n",
    "\n",
    "from psumm import preprocess_section, Summarizer, STOP_TOKENS, Input, InfoF\n",
    "from nltk.stem import PorterStemmer\n",
    "from eval_metric import rouge_perl\n",
    "from functools import reduce\n",
    "import math\n",
    "import nltk\n",
    "from copy import deepcopy\n",
    "from summa.preprocessing.textcleaner import clean_text_by_sentences, tokenize\n",
    "\n",
    "import corpus\n",
    "from configparser import ConfigParser\n",
    "    \n",
    "config_file = \"../config.ini\"\n",
    "config = ConfigParser()\n",
    "config.optionxform=str\n",
    "config.read(config_file)\n",
    "\n",
    "corpora = list(getattr(corpus, name)(path) for name, path in config[\"Corpus\"].items())\n",
    "\n",
    "cnn, legal, sci,  acl, duc = corpora\n",
    "\n",
    "item = list(acl.items_generator())[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "introduction, section, conclusion = map(\n",
    "    lambda x: [_x.text for _x in clean_text_by_sentences(x)],\n",
    "    [item.introduction, item.section, item.conclusion]\n",
    ")\n",
    "\n",
    "_input = Input(introduction + section + conclusion,\n",
    "            [1]*len(introduction) + [0]*len(section) + [1]*len(conclusion))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sorted(InfoF().tf_f(_input), key=lambda x: x[1], reverse=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('the', 130),\n",
       " ('of', 103),\n",
       " ('in', 63),\n",
       " ('and', 59),\n",
       " ('for', 51),\n",
       " ('to', 45),\n",
       " ('is', 40),\n",
       " ('dictionaries', 39),\n",
       " ('language', 39),\n",
       " ('that', 34),\n",
       " ('languages', 30),\n",
       " ('documents', 29),\n",
       " ('multilingual', 28),\n",
       " ('we', 28),\n",
       " ('are', 26),\n",
       " ('document', 22),\n",
       " ('on', 21),\n",
       " ('wi', 20),\n",
       " ('as', 19),\n",
       " ('dictionary', 19),\n",
       " ('our', 19),\n",
       " ('results', 18),\n",
       " ('with', 18),\n",
       " ('The', 16),\n",
       " ('based', 15),\n",
       " ('each', 15),\n",
       " ('method', 15),\n",
       " ('dataset', 14),\n",
       " ('web', 14),\n",
       " ('be', 13),\n",
       " ('given', 13),\n",
       " ('queries', 13),\n",
       " ('query', 13),\n",
       " ('this', 13),\n",
       " ('which', 13),\n",
       " ('by', 12),\n",
       " ('not', 12),\n",
       " ('synthetic', 11),\n",
       " ('word', 11),\n",
       " ('words', 11),\n",
       " ('bilingual', 10),\n",
       " ('en', 10),\n",
       " ('English', 9),\n",
       " ('Wikipedia', 9),\n",
       " ('Dl', 8),\n",
       " ('collection', 8),\n",
       " ('construction', 8),\n",
       " ('have', 8),\n",
       " ('over', 8),\n",
       " ('set', 8),\n",
       " ('Ll', 7),\n",
       " ('We', 7),\n",
       " ('all', 7),\n",
       " ('an', 7),\n",
       " ('contain', 7),\n",
       " ('it', 7),\n",
       " ('or', 7),\n",
       " ('returned', 7),\n",
       " ('than', 7),\n",
       " ('were', 7),\n",
       " ('Apriori', 6),\n",
       " ('ClueWeb09', 6),\n",
       " ('For', 6),\n",
       " ('Table', 6),\n",
       " ('containing', 6),\n",
       " ('research', 6),\n",
       " ('such', 6),\n",
       " ('terms', 6),\n",
       " ('training', 6),\n",
       " ('using', 6),\n",
       " ('In', 5),\n",
       " ('MAP', 5),\n",
       " ('being', 5),\n",
       " ('co', 5),\n",
       " ('first', 5),\n",
       " ('found', 5),\n",
       " ('from', 5),\n",
       " ('lexical', 5),\n",
       " ('more', 5),\n",
       " ('no', 5),\n",
       " ('non', 5),\n",
       " ('number', 5),\n",
       " ('only', 5),\n",
       " ('open', 5),\n",
       " ('other', 5),\n",
       " ('potentially', 5),\n",
       " ('result', 5),\n",
       " ('retrieval', 5),\n",
       " ('search', 5),\n",
       " ('target', 5),\n",
       " ('term', 5),\n",
       " ('there', 5),\n",
       " ('these', 5),\n",
       " ('work', 5),\n",
       " ('As', 4),\n",
       " ('Google', 4),\n",
       " ('This', 4),\n",
       " ('algorithm', 4),\n",
       " ('approach', 4),\n",
       " ('at', 4),\n",
       " ('been', 4),\n",
       " ('classification', 4),\n",
       " ('combinations', 4),\n",
       " ('content', 4),\n",
       " ('could', 4),\n",
       " ('genre', 4),\n",
       " ('identify', 4),\n",
       " ('if', 4),\n",
       " ('likelihood', 4),\n",
       " ('methods', 4),\n",
       " ('occurrence', 4),\n",
       " ('pre', 4),\n",
       " ('proposed', 4),\n",
       " ('relevant', 4),\n",
       " ('scores', 4),\n",
       " ('support', 4),\n",
       " ('their', 4),\n",
       " ('where', 4),\n",
       " ('wj', 4),\n",
       " ('770', 3),\n",
       " ('Japanese', 3),\n",
       " ('ability', 3),\n",
       " ('against', 3),\n",
       " ('analysis', 3),\n",
       " ('any', 3),\n",
       " ('articles', 3),\n",
       " ('assumption', 3),\n",
       " ('automatically', 3),\n",
       " ('average', 3),\n",
       " ('baseline', 3),\n",
       " ('both', 3),\n",
       " ('but', 3),\n",
       " ('can', 3),\n",
       " ('certain', 3),\n",
       " ('combination', 3),\n",
       " ('comparable', 3),\n",
       " ('construct', 3),\n",
       " ('conventional', 3),\n",
       " ('criterion', 3),\n",
       " ('de', 3),\n",
       " ('different', 3),\n",
       " ('domain', 3),\n",
       " ('evaluation', 3),\n",
       " ('experiments', 3),\n",
       " ('few', 3),\n",
       " ('formulation', 3),\n",
       " ('fr', 3),\n",
       " ('higher', 3),\n",
       " ('highly', 3),\n",
       " ('including', 3),\n",
       " ('into', 3),\n",
       " ('intuition', 3),\n",
       " ('item', 3),\n",
       " ('its', 3),\n",
       " ('learned', 3),\n",
       " ('least', 3),\n",
       " ('lexicon', 3),\n",
       " ('low', 3),\n",
       " ('occur', 3),\n",
       " ('often', 3),\n",
       " ('one', 3),\n",
       " ('original', 3),\n",
       " ('pairs', 3),\n",
       " ('parallel', 3),\n",
       " ('performed', 3),\n",
       " ('precision', 3),\n",
       " ('present', 3),\n",
       " ('querying', 3),\n",
       " ('random', 3),\n",
       " ('sdict', 3),\n",
       " ('second', 3),\n",
       " ('size', 3),\n",
       " ('standard', 3),\n",
       " ('those', 3),\n",
       " ('true', 3),\n",
       " ('two', 3),\n",
       " ('used', 3),\n",
       " ('was', 3),\n",
       " ('whether', 3),\n",
       " ('10', 2),\n",
       " ('100', 2),\n",
       " ('1000', 2),\n",
       " ('13', 2),\n",
       " ('2009', 2),\n",
       " ('32', 2),\n",
       " ('49', 2),\n",
       " ('50', 2),\n",
       " ('52', 2),\n",
       " ('62', 2),\n",
       " ('63', 2),\n",
       " ('920', 2),\n",
       " ('API', 2),\n",
       " ('Arabic', 2),\n",
       " ('Below', 2),\n",
       " ('Chinese', 2),\n",
       " ('Dictionary', 2),\n",
       " ('Dicts', 2),\n",
       " ('Figure', 2),\n",
       " ('Freedict', 2),\n",
       " ('German', 2),\n",
       " ('Italian', 2),\n",
       " ('Korean', 2),\n",
       " ('Portuguese', 2),\n",
       " ('Score', 2),\n",
       " ('Spanish', 2),\n",
       " ('To', 2),\n",
       " ('While', 2),\n",
       " ('access', 2),\n",
       " ('also', 2),\n",
       " ('although', 2),\n",
       " ('ar', 2),\n",
       " ('assume', 2),\n",
       " ('automatic', 2),\n",
       " ('background', 2),\n",
       " ('best', 2),\n",
       " ('cod', 2),\n",
       " ('component', 2),\n",
       " ('contained', 2),\n",
       " ('contains', 2),\n",
       " ('corpora', 2),\n",
       " ('corpus', 2),\n",
       " ('coverage', 2),\n",
       " ('crosslingual', 2),\n",
       " ('data', 2),\n",
       " ('detail', 2),\n",
       " ('detect', 2),\n",
       " ('detection', 2),\n",
       " ('determination', 2),\n",
       " ('developers', 2),\n",
       " ('direct', 2),\n",
       " ('discriminating', 2),\n",
       " ('effectiveness', 2),\n",
       " ('efficiently', 2),\n",
       " ('empty', 2),\n",
       " ('engine', 2),\n",
       " ('es', 2),\n",
       " ('evaluate', 2),\n",
       " ('exhaustively', 2),\n",
       " ('existing', 2),\n",
       " ('far', 2),\n",
       " ('features', 2),\n",
       " ('follows', 2),\n",
       " ('function', 2),\n",
       " ('generally', 2),\n",
       " ('greater', 2),\n",
       " ('has', 2),\n",
       " ('included', 2),\n",
       " ('index', 2),\n",
       " ('indexed', 2),\n",
       " ('indicated', 2),\n",
       " ('injected', 2),\n",
       " ('interest', 2),\n",
       " ('iteration', 2),\n",
       " ('largely', 2),\n",
       " ('likely', 2),\n",
       " ('lower', 2),\n",
       " ('may', 2),\n",
       " ('mean', 2),\n",
       " ('methodology', 2),\n",
       " ('mining', 2),\n",
       " ('mixture', 2),\n",
       " ('modified', 2),\n",
       " ('monolingual', 2),\n",
       " ('need', 2),\n",
       " ('next', 2),\n",
       " ('others', 2),\n",
       " ('patterns', 2),\n",
       " ('prefer', 2),\n",
       " ('proportions', 2),\n",
       " ('ranked', 2),\n",
       " ('rather', 2),\n",
       " ('records', 2),\n",
       " ('relatively', 2),\n",
       " ('resources', 2),\n",
       " ('return', 2),\n",
       " ('score', 2),\n",
       " ('selection', 2),\n",
       " ('sets', 2),\n",
       " ('should', 2),\n",
       " ('simply', 2),\n",
       " ('single', 2),\n",
       " ('some', 2),\n",
       " ('specific', 2),\n",
       " ('step', 2),\n",
       " ('structural', 2),\n",
       " ('style', 2),\n",
       " ('substantially', 2),\n",
       " ('targeted', 2),\n",
       " ('technical', 2),\n",
       " ('tend', 2),\n",
       " ('then', 2),\n",
       " ('thesauri', 2),\n",
       " ('they', 2),\n",
       " ('third', 2),\n",
       " ('total', 2),\n",
       " ('train', 2),\n",
       " ('type', 2),\n",
       " ('underestimate', 2),\n",
       " ('unique', 2),\n",
       " ('use', 2),\n",
       " ('variety', 2),\n",
       " ('very', 2),\n",
       " ('via', 2),\n",
       " ('weight', 2),\n",
       " ('well', 2),\n",
       " ('what', 2),\n",
       " ('00', 1),\n",
       " ('000', 1),\n",
       " ('00Average', 1),\n",
       " ('00de0', 1),\n",
       " ('00es0', 1),\n",
       " ('00zh0', 1),\n",
       " ('01ar0', 1),\n",
       " ('02', 1),\n",
       " ('03it0', 1),\n",
       " ('04Table', 1),\n",
       " ('04de0', 1),\n",
       " ('04ja0', 1),\n",
       " ('05', 1),\n",
       " ('08fr0', 1),\n",
       " ('08it0', 1),\n",
       " ('09fr0', 1),\n",
       " ('0ar0', 1),\n",
       " ('0it0', 1),\n",
       " ('11ar0', 1),\n",
       " ('12Table', 1),\n",
       " ('13ja0', 1),\n",
       " ('17', 1),\n",
       " ('17Average', 1),\n",
       " ('19es0', 1),\n",
       " ('20', 1),\n",
       " ('21', 1),\n",
       " ('246', 1),\n",
       " ('2Table', 1),\n",
       " ('2zh0', 1),\n",
       " ('320', 1),\n",
       " ('33', 1),\n",
       " ('390', 1),\n",
       " ('3de1', 1),\n",
       " ('41', 1),\n",
       " ('47', 1),\n",
       " ('48', 1),\n",
       " ('550', 1),\n",
       " ('61', 1),\n",
       " ('630', 1),\n",
       " ('690', 1),\n",
       " ('6es0', 1),\n",
       " ('74', 1),\n",
       " ('750', 1),\n",
       " ('79', 1),\n",
       " ('7fr0', 1),\n",
       " ('840', 1),\n",
       " ('880', 1),\n",
       " ('89', 1),\n",
       " ('92', 1),\n",
       " ('93', 1),\n",
       " ('940', 1),\n",
       " ('950', 1),\n",
       " ('970', 1),\n",
       " ('980', 1),\n",
       " ('9ja0', 1),\n",
       " ('Acknowledgements', 1),\n",
       " ('Also', 1),\n",
       " ('Alternatively', 1),\n",
       " ('Among', 1),\n",
       " ('Australian', 1),\n",
       " ('Based', 1),\n",
       " ('Boolean', 1),\n",
       " ('Council', 1),\n",
       " ('Despite', 1),\n",
       " ('Details', 1),\n",
       " ('DictionariesQueries', 1),\n",
       " ('Each', 1),\n",
       " ('Eight', 1),\n",
       " ('Encouragingly', 1),\n",
       " ('Examples', 1),\n",
       " ('Factors', 1),\n",
       " ('February', 1),\n",
       " ('Finally', 1),\n",
       " ('First', 1),\n",
       " ('French', 1),\n",
       " ('Given', 1),\n",
       " ('Group', 1),\n",
       " ('Having', 1),\n",
       " ('Here', 1),\n",
       " ('Indri', 1),\n",
       " ('January', 1),\n",
       " ('Language', 1),\n",
       " ('Lexical', 1),\n",
       " ('Looking', 1),\n",
       " ('MeCab', 1),\n",
       " ('Methods', 1),\n",
       " ('Morphological', 1),\n",
       " ('Most', 1),\n",
       " ('Nl', 1),\n",
       " ('Note', 1),\n",
       " ('Our', 1),\n",
       " ('Panlex', 1),\n",
       " ('Parallel', 1),\n",
       " ('Pwi', 1),\n",
       " ('P𝑝𝑎𝑝𝑒𝑟', 1),\n",
       " ('P𝑝𝑎𝑝𝑦𝑟𝑢𝑠', 1),\n",
       " ('Recall', 1),\n",
       " ('Related', 1),\n",
       " ('Research', 1),\n",
       " ('See', 1),\n",
       " ('Segmenter', 1),\n",
       " ('Stanford', 1),\n",
       " ('Such', 1),\n",
       " ('Swaheli', 1),\n",
       " ('Synthetic', 1),\n",
       " ('That', 1),\n",
       " ('WikipediaBased', 1),\n",
       " ('Word', 1),\n",
       " ('able', 1),\n",
       " ('absence', 1),\n",
       " ('accordance', 1),\n",
       " ('achieved', 1),\n",
       " ('achieving', 1),\n",
       " ('actual', 1),\n",
       " ('adapted', 1),\n",
       " ('adding', 1),\n",
       " ('affected', 1),\n",
       " ('almost', 1),\n",
       " ('anchor', 1),\n",
       " ('anonymous', 1),\n",
       " ('anti', 1),\n",
       " ('applied', 1),\n",
       " ('apply', 1),\n",
       " ('arbitrarily', 1),\n",
       " ('around', 1),\n",
       " ('assistance', 1),\n",
       " ('attained', 1),\n",
       " ('aware', 1),\n",
       " ('basic', 1),\n",
       " ('because', 1),\n",
       " ('below', 1),\n",
       " ('between', 1),\n",
       " ('bias', 1),\n",
       " ('billion', 1),\n",
       " ('broadly', 1),\n",
       " ('calculate', 1),\n",
       " ('calculated', 1),\n",
       " ('calculation', 1),\n",
       " ('carried', 1),\n",
       " ('categories', 1),\n",
       " ('categorisation', 1),\n",
       " ('certainly', 1),\n",
       " ('class', 1),\n",
       " ('classifies', 1),\n",
       " ('collected', 1),\n",
       " ('collections', 1),\n",
       " ('collocate', 1),\n",
       " ('combined', 1),\n",
       " ('combining', 1),\n",
       " ('comments', 1),\n",
       " ('commonly', 1),\n",
       " ('community', 1),\n",
       " ('comparably', 1),\n",
       " ('compiled', 1),\n",
       " ('complement', 1),\n",
       " ('considerably', 1),\n",
       " ('consists', 1),\n",
       " ('constructed', 1),\n",
       " ('cooccur', 1),\n",
       " ('count', 1),\n",
       " ('course', 1),\n",
       " ('creates', 1),\n",
       " ('credible', 1),\n",
       " ('cruiser', 1),\n",
       " ('cscore', 1),\n",
       " ('cutoff', 1),\n",
       " ('decreasing', 1),\n",
       " ('density', 1),\n",
       " ('describe', 1),\n",
       " ('described', 1),\n",
       " ('description', 1),\n",
       " ('design', 1),\n",
       " ('designed', 1),\n",
       " ('detailed', 1),\n",
       " ('details', 1),\n",
       " ('detecting', 1),\n",
       " ('develop', 1),\n",
       " ('developed', 1),\n",
       " ('developer', 1),\n",
       " ('differ', 1),\n",
       " ('discovery', 1),\n",
       " ('discriminatory', 1),\n",
       " ('distributional', 1),\n",
       " ('do', 1),\n",
       " ('documentsNote', 1),\n",
       " ('downsampled', 1),\n",
       " ('downside', 1),\n",
       " ('due', 1),\n",
       " ('dumps', 1),\n",
       " ('easily', 1),\n",
       " ('effective', 1),\n",
       " ('elements', 1),\n",
       " ('end', 1),\n",
       " ('especially', 1),\n",
       " ('estimated', 1),\n",
       " ('evaluated', 1),\n",
       " ('evaluates', 1),\n",
       " ('evaluating', 1),\n",
       " ('exist', 1),\n",
       " ('experiment', 1),\n",
       " ('experimental', 1),\n",
       " ('explore', 1),\n",
       " ('extract', 1),\n",
       " ('falls', 1),\n",
       " ('filter', 1),\n",
       " ('filtering', 1),\n",
       " ('final', 1),\n",
       " ('follow', 1),\n",
       " ('form', 1),\n",
       " ('formed', 1),\n",
       " ('forms', 1),\n",
       " ('four', 1),\n",
       " ('franca', 1),\n",
       " ('frequent', 1),\n",
       " ('funding', 1),\n",
       " ('future', 1),\n",
       " ('general', 1),\n",
       " ('generate', 1),\n",
       " ('generation', 1),\n",
       " ('genuine', 1),\n",
       " ('glossed', 1),\n",
       " ('glossing', 1),\n",
       " ('gospel', 1),\n",
       " ('guarantee', 1),\n",
       " ('guaranteed', 1),\n",
       " ('had', 1),\n",
       " ('hand', 1),\n",
       " ('high', 1),\n",
       " ('highest', 1),\n",
       " ('hope', 1),\n",
       " ('iS', 1),\n",
       " ('identification', 1),\n",
       " ('identifying', 1),\n",
       " ('iff', 1),\n",
       " ('immediately', 1),\n",
       " ('impact', 1),\n",
       " ('include', 1),\n",
       " ('incorporating', 1),\n",
       " ('increasing', 1),\n",
       " ('indeed', 1),\n",
       " ('indeterminate', 1),\n",
       " ('individual', 1),\n",
       " ('inflected', 1),\n",
       " ('information', 1),\n",
       " ('initial', 1),\n",
       " ('instance', 1),\n",
       " ('inverted', 1),\n",
       " ('involving', 1),\n",
       " ('issue', 1),\n",
       " ('itemset', 1),\n",
       " ('ja', 1),\n",
       " ('judged', 1),\n",
       " ('knowledge', 1),\n",
       " ('ko', 1),\n",
       " ('lNl', 1),\n",
       " ('label', 1),\n",
       " ('learnedAvg', 1),\n",
       " ('lemmas', 1),\n",
       " ('length', 1),\n",
       " ('lengthen3', 1),\n",
       " ('lexemes', 1),\n",
       " ('lexicographer', 1),\n",
       " ('lexicons', 1),\n",
       " ('line', 1),\n",
       " ('lines', 1),\n",
       " ('lingua', 1),\n",
       " ('link', 1),\n",
       " ('list', 1),\n",
       " ('lists', 1),\n",
       " ('little', 1),\n",
       " ('local', 1),\n",
       " ('locate', 1),\n",
       " ('log', 1),\n",
       " ('longest', 1),\n",
       " ('manual', 1),\n",
       " ('mature', 1),\n",
       " ('maxlPl', 1),\n",
       " ('million', 1),\n",
       " ('minus', 1),\n",
       " ('mix', 1),\n",
       " ('mono', 1),\n",
       " ('most', 1),\n",
       " ('multi', 1),\n",
       " ('naively', 1),\n",
       " ('name', 1),\n",
       " ('namely', 1),\n",
       " ('needs', 1),\n",
       " ('new', 1),\n",
       " ('noodle', 1),\n",
       " ('noting', 1),\n",
       " ('nouns', 1),\n",
       " ('novel', 1),\n",
       " ('obtained', 1),\n",
       " ('obvious', 1),\n",
       " ('occurs', 1),\n",
       " ('optimisation', 1),\n",
       " ('order', 1),\n",
       " ('org', 1),\n",
       " ('otherwise', 1),\n",
       " ('out', 1),\n",
       " ('outside', 1),\n",
       " ('pages', 1),\n",
       " ('pair', 1),\n",
       " ('paired', 1),\n",
       " ('pairing', 1),\n",
       " ('panlex', 1),\n",
       " ('paper', 1),\n",
       " ('particular', 1),\n",
       " ('perform', 1),\n",
       " ('possible', 1),\n",
       " ('predict', 1),\n",
       " ('predictable', 1),\n",
       " ('predominantly', 1),\n",
       " ('preferred', 1),\n",
       " ('presented', 1),\n",
       " ('prevalence', 1),\n",
       " ('previous', 1),\n",
       " ('prior', 1),\n",
       " ('priori', 1),\n",
       " ('proceeded', 1),\n",
       " ('proportion', 1),\n",
       " ('pruned', 1),\n",
       " ('pt', 1),\n",
       " ('question', 1),\n",
       " ('randomly', 1),\n",
       " ('range', 1),\n",
       " ('ranking', 1),\n",
       " ('realistically', 1),\n",
       " ('recall', 1),\n",
       " ('regular', 1),\n",
       " ('reject', 1),\n",
       " ('relative', 1),\n",
       " ('removed', 1),\n",
       " ('respectively', 1),\n",
       " ('restricting', 1),\n",
       " ('resultant', 1),\n",
       " ('retrieve', 1),\n",
       " ('retrieved', 1),\n",
       " ('returns', 1),\n",
       " ('reviewers', 1),\n",
       " ('said', 1),\n",
       " ('same', 1),\n",
       " ('satisfying', 1),\n",
       " ('scope', 1),\n",
       " ('sdictwi', 1),\n",
       " ('seeks', 1),\n",
       " ('segmentation', 1),\n",
       " ('select', 1),\n",
       " ('selected', 1),\n",
       " ('selective', 1),\n",
       " ('similarity', 1),\n",
       " ('simple', 1),\n",
       " ('site', 1),\n",
       " ('small', 1),\n",
       " ('smallerLangDictsMAPBaselineen0', 1),\n",
       " ('smallerLangDictsMAPBaselinezh0', 1),\n",
       " ('smallerLangWikipedia', 1),\n",
       " ('smallerLanguageProportionen', 1),\n",
       " ('so', 1),\n",
       " ('sophistication', 1),\n",
       " ('source', 1),\n",
       " ('sourced', 1),\n",
       " ('specialised', 1),\n",
       " ('specifically', 1),\n",
       " ('strong', 1),\n",
       " ('subset', 1),\n",
       " ('successful', 1),\n",
       " ('suggest', 1),\n",
       " ('suited', 1),\n",
       " ('supported', 1),\n",
       " ('sushi', 1),\n",
       " ('suspect', 1),\n",
       " ('system', 1),\n",
       " ('systematicity', 1),\n",
       " ('task', 1),\n",
       " ('ten', 1),\n",
       " ('terminology', 1),\n",
       " ('text', 1),\n",
       " ('thank', 1),\n",
       " ('them', 1),\n",
       " ('theoretically', 1),\n",
       " ('three', 1),\n",
       " ('threshold', 1),\n",
       " ('thus', 1),\n",
       " ('together', 1),\n",
       " ('towards', 1),\n",
       " ('transliterated', 1),\n",
       " ('typicality', 1),\n",
       " ('underlying', 1),\n",
       " ('until', 1),\n",
       " ('usable', 1),\n",
       " ('valuable', 1),\n",
       " ('value', 1),\n",
       " ('values', 1),\n",
       " ('varies', 1),\n",
       " ('verbs', 1),\n",
       " ('vocabulary', 1),\n",
       " ('want', 1),\n",
       " ('way', 1),\n",
       " ('ways', 1),\n",
       " ('weighing', 1),\n",
       " ('whereas', 1),\n",
       " ('whereby', 1),\n",
       " ('while', 1),\n",
       " ('wide', 1),\n",
       " ('wiotherwisewhere', 1),\n",
       " ('wish', 1),\n",
       " ('within', 1),\n",
       " ('wn', 1),\n",
       " ('would', 1),\n",
       " ('zero', 1),\n",
       " ('zh', 1)]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorted(InfoF().tf_f(_input), key=lambda x: x[1], reverse=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('00', 3.8918202981106265),\n",
       " ('000', 3.8918202981106265),\n",
       " ('00Average', 3.8918202981106265),\n",
       " ('00de0', 3.8918202981106265),\n",
       " ('00es0', 3.8918202981106265),\n",
       " ('00zh0', 3.8918202981106265),\n",
       " ('01ar0', 3.8918202981106265),\n",
       " ('02', 3.8918202981106265),\n",
       " ('03it0', 3.8918202981106265),\n",
       " ('04Table', 3.8918202981106265),\n",
       " ('04de0', 3.8918202981106265),\n",
       " ('04ja0', 3.8918202981106265),\n",
       " ('05', 3.8918202981106265),\n",
       " ('08fr0', 3.8918202981106265),\n",
       " ('08it0', 3.8918202981106265),\n",
       " ('09fr0', 3.8918202981106265),\n",
       " ('0ar0', 3.8918202981106265),\n",
       " ('0it0', 3.8918202981106265),\n",
       " ('10', 3.8918202981106265),\n",
       " ('100', 3.8918202981106265),\n",
       " ('11ar0', 3.8918202981106265),\n",
       " ('12Table', 3.8918202981106265),\n",
       " ('13', 3.8918202981106265),\n",
       " ('13ja0', 3.8918202981106265),\n",
       " ('17', 3.8918202981106265),\n",
       " ('17Average', 3.8918202981106265),\n",
       " ('19es0', 3.8918202981106265),\n",
       " ('20', 3.8918202981106265),\n",
       " ('21', 3.8918202981106265),\n",
       " ('246', 3.8918202981106265),\n",
       " ('2Table', 3.8918202981106265),\n",
       " ('2zh0', 3.8918202981106265),\n",
       " ('320', 3.8918202981106265),\n",
       " ('33', 3.8918202981106265),\n",
       " ('390', 3.8918202981106265),\n",
       " ('3de1', 3.8918202981106265),\n",
       " ('41', 3.8918202981106265),\n",
       " ('47', 3.8918202981106265),\n",
       " ('48', 3.8918202981106265),\n",
       " ('550', 3.8918202981106265),\n",
       " ('61', 3.8918202981106265),\n",
       " ('62', 3.8918202981106265),\n",
       " ('63', 3.8918202981106265),\n",
       " ('630', 3.8918202981106265),\n",
       " ('690', 3.8918202981106265),\n",
       " ('6es0', 3.8918202981106265),\n",
       " ('74', 3.8918202981106265),\n",
       " ('750', 3.8918202981106265),\n",
       " ('79', 3.8918202981106265),\n",
       " ('7fr0', 3.8918202981106265),\n",
       " ('840', 3.8918202981106265),\n",
       " ('880', 3.8918202981106265),\n",
       " ('89', 3.8918202981106265),\n",
       " ('92', 3.8918202981106265),\n",
       " ('93', 3.8918202981106265),\n",
       " ('940', 3.8918202981106265),\n",
       " ('950', 3.8918202981106265),\n",
       " ('970', 3.8918202981106265),\n",
       " ('980', 3.8918202981106265),\n",
       " ('9ja0', 3.8918202981106265),\n",
       " ('Acknowledgements', 3.8918202981106265),\n",
       " ('Also', 3.8918202981106265),\n",
       " ('Alternatively', 3.8918202981106265),\n",
       " ('Among', 3.8918202981106265),\n",
       " ('Australian', 3.8918202981106265),\n",
       " ('Based', 3.8918202981106265),\n",
       " ('Boolean', 3.8918202981106265),\n",
       " ('Council', 3.8918202981106265),\n",
       " ('Despite', 3.8918202981106265),\n",
       " ('Details', 3.8918202981106265),\n",
       " ('DictionariesQueries', 3.8918202981106265),\n",
       " ('Each', 3.8918202981106265),\n",
       " ('Eight', 3.8918202981106265),\n",
       " ('Encouragingly', 3.8918202981106265),\n",
       " ('Examples', 3.8918202981106265),\n",
       " ('Factors', 3.8918202981106265),\n",
       " ('February', 3.8918202981106265),\n",
       " ('Figure', 3.8918202981106265),\n",
       " ('Finally', 3.8918202981106265),\n",
       " ('First', 3.8918202981106265),\n",
       " ('French', 3.8918202981106265),\n",
       " ('Given', 3.8918202981106265),\n",
       " ('Group', 3.8918202981106265),\n",
       " ('Having', 3.8918202981106265),\n",
       " ('Here', 3.8918202981106265),\n",
       " ('Indri', 3.8918202981106265),\n",
       " ('January', 3.8918202981106265),\n",
       " ('Korean', 3.8918202981106265),\n",
       " ('Language', 3.8918202981106265),\n",
       " ('Lexical', 3.8918202981106265),\n",
       " ('Looking', 3.8918202981106265),\n",
       " ('MeCab', 3.8918202981106265),\n",
       " ('Methods', 3.8918202981106265),\n",
       " ('Morphological', 3.8918202981106265),\n",
       " ('Most', 3.8918202981106265),\n",
       " ('Nl', 3.8918202981106265),\n",
       " ('Note', 3.8918202981106265),\n",
       " ('Our', 3.8918202981106265),\n",
       " ('Panlex', 3.8918202981106265),\n",
       " ('Parallel', 3.8918202981106265),\n",
       " ('Portuguese', 3.8918202981106265),\n",
       " ('Pwi', 3.8918202981106265),\n",
       " ('P𝑝𝑎𝑝𝑒𝑟', 3.8918202981106265),\n",
       " ('P𝑝𝑎𝑝𝑦𝑟𝑢𝑠', 3.8918202981106265),\n",
       " ('Recall', 3.8918202981106265),\n",
       " ('Related', 3.8918202981106265),\n",
       " ('Research', 3.8918202981106265),\n",
       " ('See', 3.8918202981106265),\n",
       " ('Segmenter', 3.8918202981106265),\n",
       " ('Stanford', 3.8918202981106265),\n",
       " ('Such', 3.8918202981106265),\n",
       " ('Swaheli', 3.8918202981106265),\n",
       " ('Synthetic', 3.8918202981106265),\n",
       " ('That', 3.8918202981106265),\n",
       " ('WikipediaBased', 3.8918202981106265),\n",
       " ('Word', 3.8918202981106265),\n",
       " ('able', 3.8918202981106265),\n",
       " ('absence', 3.8918202981106265),\n",
       " ('accordance', 3.8918202981106265),\n",
       " ('achieved', 3.8918202981106265),\n",
       " ('achieving', 3.8918202981106265),\n",
       " ('actual', 3.8918202981106265),\n",
       " ('adapted', 3.8918202981106265),\n",
       " ('adding', 3.8918202981106265),\n",
       " ('affected', 3.8918202981106265),\n",
       " ('almost', 3.8918202981106265),\n",
       " ('anchor', 3.8918202981106265),\n",
       " ('anonymous', 3.8918202981106265),\n",
       " ('anti', 3.8918202981106265),\n",
       " ('applied', 3.8918202981106265),\n",
       " ('apply', 3.8918202981106265),\n",
       " ('arbitrarily', 3.8918202981106265),\n",
       " ('around', 3.8918202981106265),\n",
       " ('assistance', 3.8918202981106265),\n",
       " ('attained', 3.8918202981106265),\n",
       " ('automatic', 3.8918202981106265),\n",
       " ('aware', 3.8918202981106265),\n",
       " ('basic', 3.8918202981106265),\n",
       " ('because', 3.8918202981106265),\n",
       " ('below', 3.8918202981106265),\n",
       " ('between', 3.8918202981106265),\n",
       " ('bias', 3.8918202981106265),\n",
       " ('billion', 3.8918202981106265),\n",
       " ('broadly', 3.8918202981106265),\n",
       " ('calculate', 3.8918202981106265),\n",
       " ('calculated', 3.8918202981106265),\n",
       " ('calculation', 3.8918202981106265),\n",
       " ('carried', 3.8918202981106265),\n",
       " ('categories', 3.8918202981106265),\n",
       " ('categorisation', 3.8918202981106265),\n",
       " ('certainly', 3.8918202981106265),\n",
       " ('class', 3.8918202981106265),\n",
       " ('classifies', 3.8918202981106265),\n",
       " ('cod', 3.8918202981106265),\n",
       " ('collected', 3.8918202981106265),\n",
       " ('collections', 3.8918202981106265),\n",
       " ('collocate', 3.8918202981106265),\n",
       " ('combined', 3.8918202981106265),\n",
       " ('combining', 3.8918202981106265),\n",
       " ('comments', 3.8918202981106265),\n",
       " ('commonly', 3.8918202981106265),\n",
       " ('community', 3.8918202981106265),\n",
       " ('comparably', 3.8918202981106265),\n",
       " ('compiled', 3.8918202981106265),\n",
       " ('complement', 3.8918202981106265),\n",
       " ('considerably', 3.8918202981106265),\n",
       " ('consists', 3.8918202981106265),\n",
       " ('constructed', 3.8918202981106265),\n",
       " ('cooccur', 3.8918202981106265),\n",
       " ('count', 3.8918202981106265),\n",
       " ('course', 3.8918202981106265),\n",
       " ('creates', 3.8918202981106265),\n",
       " ('credible', 3.8918202981106265),\n",
       " ('cruiser', 3.8918202981106265),\n",
       " ('cscore', 3.8918202981106265),\n",
       " ('cutoff', 3.8918202981106265),\n",
       " ('decreasing', 3.8918202981106265),\n",
       " ('density', 3.8918202981106265),\n",
       " ('describe', 3.8918202981106265),\n",
       " ('described', 3.8918202981106265),\n",
       " ('description', 3.8918202981106265),\n",
       " ('design', 3.8918202981106265),\n",
       " ('designed', 3.8918202981106265),\n",
       " ('detailed', 3.8918202981106265),\n",
       " ('details', 3.8918202981106265),\n",
       " ('detect', 3.8918202981106265),\n",
       " ('detecting', 3.8918202981106265),\n",
       " ('develop', 3.8918202981106265),\n",
       " ('developed', 3.8918202981106265),\n",
       " ('developer', 3.8918202981106265),\n",
       " ('differ', 3.8918202981106265),\n",
       " ('discovery', 3.8918202981106265),\n",
       " ('discriminatory', 3.8918202981106265),\n",
       " ('distributional', 3.8918202981106265),\n",
       " ('do', 3.8918202981106265),\n",
       " ('documentsNote', 3.8918202981106265),\n",
       " ('downsampled', 3.8918202981106265),\n",
       " ('downside', 3.8918202981106265),\n",
       " ('due', 3.8918202981106265),\n",
       " ('dumps', 3.8918202981106265),\n",
       " ('easily', 3.8918202981106265),\n",
       " ('effective', 3.8918202981106265),\n",
       " ('efficiently', 3.8918202981106265),\n",
       " ('elements', 3.8918202981106265),\n",
       " ('end', 3.8918202981106265),\n",
       " ('especially', 3.8918202981106265),\n",
       " ('estimated', 3.8918202981106265),\n",
       " ('evaluated', 3.8918202981106265),\n",
       " ('evaluates', 3.8918202981106265),\n",
       " ('evaluating', 3.8918202981106265),\n",
       " ('exist', 3.8918202981106265),\n",
       " ('experiment', 3.8918202981106265),\n",
       " ('experimental', 3.8918202981106265),\n",
       " ('explore', 3.8918202981106265),\n",
       " ('extract', 3.8918202981106265),\n",
       " ('falls', 3.8918202981106265),\n",
       " ('filter', 3.8918202981106265),\n",
       " ('filtering', 3.8918202981106265),\n",
       " ('final', 3.8918202981106265),\n",
       " ('follow', 3.8918202981106265),\n",
       " ('form', 3.8918202981106265),\n",
       " ('formed', 3.8918202981106265),\n",
       " ('forms', 3.8918202981106265),\n",
       " ('four', 3.8918202981106265),\n",
       " ('franca', 3.8918202981106265),\n",
       " ('frequent', 3.8918202981106265),\n",
       " ('funding', 3.8918202981106265),\n",
       " ('future', 3.8918202981106265),\n",
       " ('general', 3.8918202981106265),\n",
       " ('generate', 3.8918202981106265),\n",
       " ('generation', 3.8918202981106265),\n",
       " ('genuine', 3.8918202981106265),\n",
       " ('glossed', 3.8918202981106265),\n",
       " ('glossing', 3.8918202981106265),\n",
       " ('gospel', 3.8918202981106265),\n",
       " ('guarantee', 3.8918202981106265),\n",
       " ('guaranteed', 3.8918202981106265),\n",
       " ('had', 3.8918202981106265),\n",
       " ('hand', 3.8918202981106265),\n",
       " ('has', 3.8918202981106265),\n",
       " ('high', 3.8918202981106265),\n",
       " ('highest', 3.8918202981106265),\n",
       " ('hope', 3.8918202981106265),\n",
       " ('iS', 3.8918202981106265),\n",
       " ('identification', 3.8918202981106265),\n",
       " ('identifying', 3.8918202981106265),\n",
       " ('iff', 3.8918202981106265),\n",
       " ('immediately', 3.8918202981106265),\n",
       " ('impact', 3.8918202981106265),\n",
       " ('include', 3.8918202981106265),\n",
       " ('incorporating', 3.8918202981106265),\n",
       " ('increasing', 3.8918202981106265),\n",
       " ('indeed', 3.8918202981106265),\n",
       " ('indeterminate', 3.8918202981106265),\n",
       " ('individual', 3.8918202981106265),\n",
       " ('inflected', 3.8918202981106265),\n",
       " ('information', 3.8918202981106265),\n",
       " ('initial', 3.8918202981106265),\n",
       " ('instance', 3.8918202981106265),\n",
       " ('inverted', 3.8918202981106265),\n",
       " ('involving', 3.8918202981106265),\n",
       " ('issue', 3.8918202981106265),\n",
       " ('itemset', 3.8918202981106265),\n",
       " ('iteration', 3.8918202981106265),\n",
       " ('ja', 3.8918202981106265),\n",
       " ('judged', 3.8918202981106265),\n",
       " ('knowledge', 3.8918202981106265),\n",
       " ('ko', 3.8918202981106265),\n",
       " ('lNl', 3.8918202981106265),\n",
       " ('label', 3.8918202981106265),\n",
       " ('learned', 3.8918202981106265),\n",
       " ('learnedAvg', 3.8918202981106265),\n",
       " ('lemmas', 3.8918202981106265),\n",
       " ('length', 3.8918202981106265),\n",
       " ('lengthen3', 3.8918202981106265),\n",
       " ('lexemes', 3.8918202981106265),\n",
       " ('lexicographer', 3.8918202981106265),\n",
       " ('lexicons', 3.8918202981106265),\n",
       " ('line', 3.8918202981106265),\n",
       " ('lines', 3.8918202981106265),\n",
       " ('lingua', 3.8918202981106265),\n",
       " ('link', 3.8918202981106265),\n",
       " ('list', 3.8918202981106265),\n",
       " ('lists', 3.8918202981106265),\n",
       " ('little', 3.8918202981106265),\n",
       " ('local', 3.8918202981106265),\n",
       " ('locate', 3.8918202981106265),\n",
       " ('log', 3.8918202981106265),\n",
       " ('longest', 3.8918202981106265),\n",
       " ('manual', 3.8918202981106265),\n",
       " ('mature', 3.8918202981106265),\n",
       " ('maxlPl', 3.8918202981106265),\n",
       " ('may', 3.8918202981106265),\n",
       " ('mean', 3.8918202981106265),\n",
       " ('million', 3.8918202981106265),\n",
       " ('minus', 3.8918202981106265),\n",
       " ('mix', 3.8918202981106265),\n",
       " ('mono', 3.8918202981106265),\n",
       " ('most', 3.8918202981106265),\n",
       " ('multi', 3.8918202981106265),\n",
       " ('naively', 3.8918202981106265),\n",
       " ('name', 3.8918202981106265),\n",
       " ('namely', 3.8918202981106265),\n",
       " ('needs', 3.8918202981106265),\n",
       " ('new', 3.8918202981106265),\n",
       " ('noodle', 3.8918202981106265),\n",
       " ('noting', 3.8918202981106265),\n",
       " ('nouns', 3.8918202981106265),\n",
       " ('novel', 3.8918202981106265),\n",
       " ('obtained', 3.8918202981106265),\n",
       " ('obvious', 3.8918202981106265),\n",
       " ('occurs', 3.8918202981106265),\n",
       " ('optimisation', 3.8918202981106265),\n",
       " ('order', 3.8918202981106265),\n",
       " ('org', 3.8918202981106265),\n",
       " ('otherwise', 3.8918202981106265),\n",
       " ('out', 3.8918202981106265),\n",
       " ('outside', 3.8918202981106265),\n",
       " ('pages', 3.8918202981106265),\n",
       " ('pair', 3.8918202981106265),\n",
       " ('paired', 3.8918202981106265),\n",
       " ('pairing', 3.8918202981106265),\n",
       " ('panlex', 3.8918202981106265),\n",
       " ('paper', 3.8918202981106265),\n",
       " ('particular', 3.8918202981106265),\n",
       " ('perform', 3.8918202981106265),\n",
       " ('performed', 3.8918202981106265),\n",
       " ('possible', 3.8918202981106265),\n",
       " ('predict', 3.8918202981106265),\n",
       " ('predictable', 3.8918202981106265),\n",
       " ('predominantly', 3.8918202981106265),\n",
       " ('preferred', 3.8918202981106265),\n",
       " ('presented', 3.8918202981106265),\n",
       " ('prevalence', 3.8918202981106265),\n",
       " ('previous', 3.8918202981106265),\n",
       " ('prior', 3.8918202981106265),\n",
       " ('priori', 3.8918202981106265),\n",
       " ('proceeded', 3.8918202981106265),\n",
       " ('proportion', 3.8918202981106265),\n",
       " ('proportions', 3.8918202981106265),\n",
       " ('pruned', 3.8918202981106265),\n",
       " ('pt', 3.8918202981106265),\n",
       " ('question', 3.8918202981106265),\n",
       " ('randomly', 3.8918202981106265),\n",
       " ('range', 3.8918202981106265),\n",
       " ('ranking', 3.8918202981106265),\n",
       " ('realistically', 3.8918202981106265),\n",
       " ('recall', 3.8918202981106265),\n",
       " ('records', 3.8918202981106265),\n",
       " ('regular', 3.8918202981106265),\n",
       " ('reject', 3.8918202981106265),\n",
       " ('relative', 3.8918202981106265),\n",
       " ('removed', 3.8918202981106265),\n",
       " ('respectively', 3.8918202981106265),\n",
       " ('restricting', 3.8918202981106265),\n",
       " ('resultant', 3.8918202981106265),\n",
       " ('retrieve', 3.8918202981106265),\n",
       " ('retrieved', 3.8918202981106265),\n",
       " ('returns', 3.8918202981106265),\n",
       " ('reviewers', 3.8918202981106265),\n",
       " ('said', 3.8918202981106265),\n",
       " ('same', 3.8918202981106265),\n",
       " ('satisfying', 3.8918202981106265),\n",
       " ('scope', 3.8918202981106265),\n",
       " ('sdictwi', 3.8918202981106265),\n",
       " ('seeks', 3.8918202981106265),\n",
       " ('segmentation', 3.8918202981106265),\n",
       " ('select', 3.8918202981106265),\n",
       " ('selected', 3.8918202981106265),\n",
       " ('selective', 3.8918202981106265),\n",
       " ('similarity', 3.8918202981106265),\n",
       " ('simple', 3.8918202981106265),\n",
       " ('site', 3.8918202981106265),\n",
       " ('small', 3.8918202981106265),\n",
       " ('smallerLangDictsMAPBaselineen0', 3.8918202981106265),\n",
       " ('smallerLangDictsMAPBaselinezh0', 3.8918202981106265),\n",
       " ('smallerLangWikipedia', 3.8918202981106265),\n",
       " ('smallerLanguageProportionen', 3.8918202981106265),\n",
       " ('so', 3.8918202981106265),\n",
       " ('sophistication', 3.8918202981106265),\n",
       " ('source', 3.8918202981106265),\n",
       " ('sourced', 3.8918202981106265),\n",
       " ('specialised', 3.8918202981106265),\n",
       " ('specific', 3.8918202981106265),\n",
       " ('specifically', 3.8918202981106265),\n",
       " ('strong', 3.8918202981106265),\n",
       " ('subset', 3.8918202981106265),\n",
       " ('successful', 3.8918202981106265),\n",
       " ('suggest', 3.8918202981106265),\n",
       " ('suited', 3.8918202981106265),\n",
       " ('supported', 3.8918202981106265),\n",
       " ('sushi', 3.8918202981106265),\n",
       " ('suspect', 3.8918202981106265),\n",
       " ('system', 3.8918202981106265),\n",
       " ('systematicity', 3.8918202981106265),\n",
       " ('task', 3.8918202981106265),\n",
       " ('ten', 3.8918202981106265),\n",
       " ('tend', 3.8918202981106265),\n",
       " ('terminology', 3.8918202981106265),\n",
       " ('text', 3.8918202981106265),\n",
       " ('thank', 3.8918202981106265),\n",
       " ('them', 3.8918202981106265),\n",
       " ('theoretically', 3.8918202981106265),\n",
       " ('three', 3.8918202981106265),\n",
       " ('threshold', 3.8918202981106265),\n",
       " ('thus', 3.8918202981106265),\n",
       " ('together', 3.8918202981106265),\n",
       " ('towards', 3.8918202981106265),\n",
       " ('transliterated', 3.8918202981106265),\n",
       " ('typicality', 3.8918202981106265),\n",
       " ('underlying', 3.8918202981106265),\n",
       " ('unique', 3.8918202981106265),\n",
       " ('until', 3.8918202981106265),\n",
       " ('usable', 3.8918202981106265),\n",
       " ('valuable', 3.8918202981106265),\n",
       " ('value', 3.8918202981106265),\n",
       " ('values', 3.8918202981106265),\n",
       " ('varies', 3.8918202981106265),\n",
       " ('verbs', 3.8918202981106265),\n",
       " ('vocabulary', 3.8918202981106265),\n",
       " ('want', 3.8918202981106265),\n",
       " ('way', 3.8918202981106265),\n",
       " ('ways', 3.8918202981106265),\n",
       " ('weighing', 3.8918202981106265),\n",
       " ('weight', 3.8918202981106265),\n",
       " ('whereas', 3.8918202981106265),\n",
       " ('whereby', 3.8918202981106265),\n",
       " ('while', 3.8918202981106265),\n",
       " ('wide', 3.8918202981106265),\n",
       " ('wiotherwisewhere', 3.8918202981106265),\n",
       " ('wish', 3.8918202981106265),\n",
       " ('within', 3.8918202981106265),\n",
       " ('wj', 3.8918202981106265),\n",
       " ('wn', 3.8918202981106265),\n",
       " ('would', 3.8918202981106265),\n",
       " ('zero', 3.8918202981106265),\n",
       " ('zh', 3.8918202981106265),\n",
       " ('1000', 3.1986731175506815),\n",
       " ('2009', 3.1986731175506815),\n",
       " ('32', 3.1986731175506815),\n",
       " ('49', 3.1986731175506815),\n",
       " ('50', 3.1986731175506815),\n",
       " ('52', 3.1986731175506815),\n",
       " ('770', 3.1986731175506815),\n",
       " ('920', 3.1986731175506815),\n",
       " ('API', 3.1986731175506815),\n",
       " ('Arabic', 3.1986731175506815),\n",
       " ('Below', 3.1986731175506815),\n",
       " ('Chinese', 3.1986731175506815),\n",
       " ('Dictionary', 3.1986731175506815),\n",
       " ('Dicts', 3.1986731175506815),\n",
       " ('Freedict', 3.1986731175506815),\n",
       " ('German', 3.1986731175506815),\n",
       " ('Italian', 3.1986731175506815),\n",
       " ('Score', 3.1986731175506815),\n",
       " ('Spanish', 3.1986731175506815),\n",
       " ('To', 3.1986731175506815),\n",
       " ('While', 3.1986731175506815),\n",
       " ('access', 3.1986731175506815),\n",
       " ('also', 3.1986731175506815),\n",
       " ('although', 3.1986731175506815),\n",
       " ('analysis', 3.1986731175506815),\n",
       " ('any', 3.1986731175506815),\n",
       " ('ar', 3.1986731175506815),\n",
       " ('assume', 3.1986731175506815),\n",
       " ('average', 3.1986731175506815),\n",
       " ('background', 3.1986731175506815),\n",
       " ('best', 3.1986731175506815),\n",
       " ('can', 3.1986731175506815),\n",
       " ('certain', 3.1986731175506815),\n",
       " ('comparable', 3.1986731175506815),\n",
       " ('component', 3.1986731175506815),\n",
       " ('construct', 3.1986731175506815),\n",
       " ('contained', 3.1986731175506815),\n",
       " ('contains', 3.1986731175506815),\n",
       " ('corpora', 3.1986731175506815),\n",
       " ('corpus', 3.1986731175506815),\n",
       " ('coverage', 3.1986731175506815),\n",
       " ('crosslingual', 3.1986731175506815),\n",
       " ('data', 3.1986731175506815),\n",
       " ('detail', 3.1986731175506815),\n",
       " ('detection', 3.1986731175506815),\n",
       " ('determination', 3.1986731175506815),\n",
       " ('developers', 3.1986731175506815),\n",
       " ('direct', 3.1986731175506815),\n",
       " ('discriminating', 3.1986731175506815),\n",
       " ('domain', 3.1986731175506815),\n",
       " ('effectiveness', 3.1986731175506815),\n",
       " ('empty', 3.1986731175506815),\n",
       " ('engine', 3.1986731175506815),\n",
       " ('es', 3.1986731175506815),\n",
       " ('evaluate', 3.1986731175506815),\n",
       " ('evaluation', 3.1986731175506815),\n",
       " ('exhaustively', 3.1986731175506815),\n",
       " ('existing', 3.1986731175506815),\n",
       " ('far', 3.1986731175506815),\n",
       " ('features', 3.1986731175506815),\n",
       " ('follows', 3.1986731175506815),\n",
       " ('function', 3.1986731175506815),\n",
       " ('generally', 3.1986731175506815),\n",
       " ('greater', 3.1986731175506815),\n",
       " ('included', 3.1986731175506815),\n",
       " ('index', 3.1986731175506815),\n",
       " ('indexed', 3.1986731175506815),\n",
       " ('indicated', 3.1986731175506815),\n",
       " ('injected', 3.1986731175506815),\n",
       " ('interest', 3.1986731175506815),\n",
       " ('item', 3.1986731175506815),\n",
       " ('its', 3.1986731175506815),\n",
       " ('largely', 3.1986731175506815),\n",
       " ('lexicon', 3.1986731175506815),\n",
       " ('likely', 3.1986731175506815),\n",
       " ('lower', 3.1986731175506815),\n",
       " ('methodology', 3.1986731175506815),\n",
       " ('mining', 3.1986731175506815),\n",
       " ('mixture', 3.1986731175506815),\n",
       " ('modified', 3.1986731175506815),\n",
       " ('monolingual', 3.1986731175506815),\n",
       " ('need', 3.1986731175506815),\n",
       " ('next', 3.1986731175506815),\n",
       " ('original', 3.1986731175506815),\n",
       " ('others', 3.1986731175506815),\n",
       " ('patterns', 3.1986731175506815),\n",
       " ('precision', 3.1986731175506815),\n",
       " ('prefer', 3.1986731175506815),\n",
       " ('ranked', 3.1986731175506815),\n",
       " ('rather', 3.1986731175506815),\n",
       " ('relatively', 3.1986731175506815),\n",
       " ('resources', 3.1986731175506815),\n",
       " ('return', 3.1986731175506815),\n",
       " ('score', 3.1986731175506815),\n",
       " ('sdict', 3.1986731175506815),\n",
       " ('selection', 3.1986731175506815),\n",
       " ('sets', 3.1986731175506815),\n",
       " ('should', 3.1986731175506815),\n",
       " ('simply', 3.1986731175506815),\n",
       " ('single', 3.1986731175506815),\n",
       " ('some', 3.1986731175506815),\n",
       " ('step', 3.1986731175506815),\n",
       " ('structural', 3.1986731175506815),\n",
       " ('style', 3.1986731175506815),\n",
       " ('substantially', 3.1986731175506815),\n",
       " ('targeted', 3.1986731175506815),\n",
       " ('technical', 3.1986731175506815),\n",
       " ('then', 3.1986731175506815),\n",
       " ('thesauri', 3.1986731175506815),\n",
       " ('they', 3.1986731175506815),\n",
       " ('third', 3.1986731175506815),\n",
       " ('total', 3.1986731175506815),\n",
       " ('train', 3.1986731175506815),\n",
       " ('two', 3.1986731175506815),\n",
       " ('type', 3.1986731175506815),\n",
       " ('underestimate', 3.1986731175506815),\n",
       " ('use', 3.1986731175506815),\n",
       " ('variety', 3.1986731175506815),\n",
       " ('very', 3.1986731175506815),\n",
       " ('via', 3.1986731175506815),\n",
       " ('well', 3.1986731175506815),\n",
       " ('what', 3.1986731175506815),\n",
       " ('Apriori', 2.793208009442517),\n",
       " ('Dl', 2.793208009442517),\n",
       " ('Japanese', 2.793208009442517),\n",
       " ('Ll', 2.793208009442517),\n",
       " ('ability', 2.793208009442517),\n",
       " ('against', 2.793208009442517),\n",
       " ('algorithm', 2.793208009442517),\n",
       " ('approach', 2.793208009442517),\n",
       " ('articles', 2.793208009442517),\n",
       " ('assumption', 2.793208009442517),\n",
       " ('at', 2.793208009442517),\n",
       " ('automatically', 2.793208009442517),\n",
       " ('baseline', 2.793208009442517),\n",
       " ('been', 2.793208009442517),\n",
       " ('both', 2.793208009442517),\n",
       " ('but', 2.793208009442517),\n",
       " ('classification', 2.793208009442517),\n",
       " ('co', 2.793208009442517),\n",
       " ('combination', 2.793208009442517),\n",
       " ('content', 2.793208009442517),\n",
       " ('conventional', 2.793208009442517),\n",
       " ('criterion', 2.793208009442517),\n",
       " ('de', 2.793208009442517),\n",
       " ('different', 2.793208009442517),\n",
       " ('experiments', 2.793208009442517),\n",
       " ('few', 2.793208009442517),\n",
       " ('formulation', 2.793208009442517),\n",
       " ('fr', 2.793208009442517),\n",
       " ('higher', 2.793208009442517),\n",
       " ('highly', 2.793208009442517),\n",
       " ('including', 2.793208009442517),\n",
       " ('into', 2.793208009442517),\n",
       " ('intuition', 2.793208009442517),\n",
       " ('least', 2.793208009442517),\n",
       " ('low', 2.793208009442517),\n",
       " ('occur', 2.793208009442517),\n",
       " ('often', 2.793208009442517),\n",
       " ('one', 2.793208009442517),\n",
       " ('pairs', 2.793208009442517),\n",
       " ('parallel', 2.793208009442517),\n",
       " ('present', 2.793208009442517),\n",
       " ('proposed', 2.793208009442517),\n",
       " ('querying', 2.793208009442517),\n",
       " ('random', 2.793208009442517),\n",
       " ('second', 2.793208009442517),\n",
       " ('size', 2.793208009442517),\n",
       " ('standard', 2.793208009442517),\n",
       " ('support', 2.793208009442517),\n",
       " ('their', 2.793208009442517),\n",
       " ('those', 2.793208009442517),\n",
       " ('true', 2.793208009442517),\n",
       " ('used', 2.793208009442517),\n",
       " ('was', 2.793208009442517),\n",
       " ('whether', 2.793208009442517),\n",
       " ('As', 2.505525936990736),\n",
       " ('ClueWeb09', 2.505525936990736),\n",
       " ('Google', 2.505525936990736),\n",
       " ('This', 2.505525936990736),\n",
       " ('combinations', 2.505525936990736),\n",
       " ('contain', 2.505525936990736),\n",
       " ('could', 2.505525936990736),\n",
       " ('en', 2.505525936990736),\n",
       " ('first', 2.505525936990736),\n",
       " ('genre', 2.505525936990736),\n",
       " ('identify', 2.505525936990736),\n",
       " ('if', 2.505525936990736),\n",
       " ('lexical', 2.505525936990736),\n",
       " ('likelihood', 2.505525936990736),\n",
       " ('methods', 2.505525936990736),\n",
       " ('more', 2.505525936990736),\n",
       " ('occurrence', 2.505525936990736),\n",
       " ('other', 2.505525936990736),\n",
       " ('pre', 2.505525936990736),\n",
       " ('relevant', 2.505525936990736),\n",
       " ('scores', 2.505525936990736),\n",
       " ('term', 2.505525936990736),\n",
       " ('where', 2.505525936990736),\n",
       " ('work', 2.505525936990736),\n",
       " ('For', 2.2823823856765264),\n",
       " ('In', 2.2823823856765264),\n",
       " ('MAP', 2.2823823856765264),\n",
       " ('Table', 2.2823823856765264),\n",
       " ('being', 2.2823823856765264),\n",
       " ('found', 2.2823823856765264),\n",
       " ('from', 2.2823823856765264),\n",
       " ('no', 2.2823823856765264),\n",
       " ('non', 2.2823823856765264),\n",
       " ('number', 2.2823823856765264),\n",
       " ('only', 2.2823823856765264),\n",
       " ('open', 2.2823823856765264),\n",
       " ('potentially', 2.2823823856765264),\n",
       " ('result', 2.2823823856765264),\n",
       " ('retrieval', 2.2823823856765264),\n",
       " ('search', 2.2823823856765264),\n",
       " ('such', 2.2823823856765264),\n",
       " ('target', 2.2823823856765264),\n",
       " ('terms', 2.2823823856765264),\n",
       " ('than', 2.2823823856765264),\n",
       " ('there', 2.2823823856765264),\n",
       " ('these', 2.2823823856765264),\n",
       " ('We', 2.1000608288825715),\n",
       " ('all', 2.1000608288825715),\n",
       " ('containing', 2.1000608288825715),\n",
       " ('it', 2.1000608288825715),\n",
       " ('or', 2.1000608288825715),\n",
       " ('over', 2.1000608288825715),\n",
       " ('research', 2.1000608288825715),\n",
       " ('returned', 2.1000608288825715),\n",
       " ('training', 2.1000608288825715),\n",
       " ('using', 2.1000608288825715),\n",
       " ('wi', 2.1000608288825715),\n",
       " ('Wikipedia', 1.9459101490553132),\n",
       " ('an', 1.9459101490553132),\n",
       " ('collection', 1.9459101490553132),\n",
       " ('construction', 1.9459101490553132),\n",
       " ('set', 1.9459101490553132),\n",
       " ('were', 1.9459101490553132),\n",
       " ('English', 1.8123787564307907),\n",
       " ('bilingual', 1.8123787564307907),\n",
       " ('have', 1.8123787564307907),\n",
       " ('word', 1.8123787564307907),\n",
       " ('words', 1.8123787564307907),\n",
       " ('by', 1.6945957207744073),\n",
       " ('queries', 1.6945957207744073),\n",
       " ('be', 1.589235205116581),\n",
       " ('each', 1.589235205116581),\n",
       " ('given', 1.589235205116581),\n",
       " ('method', 1.589235205116581),\n",
       " ('query', 1.589235205116581),\n",
       " ('synthetic', 1.589235205116581),\n",
       " ('not', 1.493925025312256),\n",
       " ('this', 1.493925025312256),\n",
       " ('web', 1.493925025312256),\n",
       " ('which', 1.493925025312256),\n",
       " ('with', 1.493925025312256),\n",
       " ('based', 1.4069136483226263),\n",
       " ('dataset', 1.4069136483226263),\n",
       " ('results', 1.32687094064909),\n",
       " ('as', 1.252762968495368),\n",
       " ('dictionary', 1.252762968495368),\n",
       " ('document', 1.252762968495368),\n",
       " ('The', 1.1837700970084166),\n",
       " ('our', 1.1837700970084166),\n",
       " ('on', 1.001448540214462),\n",
       " ('are', 0.9473813189441862),\n",
       " ('is', 0.8960880245566357),\n",
       " ('we', 0.8960880245566357),\n",
       " ('documents', 0.8472978603872037),\n",
       " ('languages', 0.8472978603872037),\n",
       " ('multilingual', 0.8472978603872037),\n",
       " ('language', 0.8007778447523106),\n",
       " ('to', 0.7137664677626809),\n",
       " ('dictionaries', 0.6337237600891445),\n",
       " ('that', 0.6337237600891445),\n",
       " ('for', 0.5245244681241527),\n",
       " ('in', 0.4906229164484712),\n",
       " ('and', 0.3953127366441464),\n",
       " ('of', 0.20294084399669038),\n",
       " ('the', 0.17824823140631876)]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorted(InfoF().idf_f(_input), key=lambda x: x[1], reverse=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('work', 19.5),\n",
       " ('been', 15.599999999999998),\n",
       " ('classification', 15.599999999999998),\n",
       " ('content', 15.599999999999998),\n",
       " ('genre', 15.599999999999998),\n",
       " ('methods', 15.599999999999998),\n",
       " ('automatically', 11.7),\n",
       " ('parallel', 11.7),\n",
       " ('there', 10.931282051282052),\n",
       " ('document', 8.496503496503495),\n",
       " ('to', 8.358347578347576),\n",
       " ('research', 7.904273504273503),\n",
       " ('While', 7.799999999999999),\n",
       " ('although', 7.799999999999999),\n",
       " ('automatic', 7.799999999999999),\n",
       " ('corpora', 7.799999999999999),\n",
       " ('crosslingual', 7.799999999999999),\n",
       " ('detect', 7.799999999999999),\n",
       " ('detection', 7.799999999999999),\n",
       " ('features', 7.799999999999999),\n",
       " ('generally', 7.799999999999999),\n",
       " ('has', 7.799999999999999),\n",
       " ('mixture', 7.799999999999999),\n",
       " ('specific', 7.799999999999999),\n",
       " ('structural', 7.799999999999999),\n",
       " ('technical', 7.799999999999999),\n",
       " ('thesauri', 7.799999999999999),\n",
       " ('type', 7.799999999999999),\n",
       " ('identify', 7.339102564102564),\n",
       " ('proposed', 7.339102564102564),\n",
       " ('relevant', 7.339102564102564),\n",
       " ('on', 6.515384615384617),\n",
       " ('as', 5.506612685560054),\n",
       " ('bilingual', 5.3910256410256405),\n",
       " ('wi', 5.128205128205128),\n",
       " ('for', 4.9570638511814975),\n",
       " ('our', 4.871794871794872),\n",
       " ('from', 4.825128205128205),\n",
       " ('have', 4.312820512820513),\n",
       " ('but', 3.9521367521367514),\n",
       " ('construct', 3.9521367521367514),\n",
       " ('domain', 3.9521367521367514),\n",
       " ('its', 3.9521367521367514),\n",
       " ('low', 3.9521367521367514),\n",
       " ('used', 3.9521367521367514),\n",
       " ('Acknowledgements', 3.8999999999999995),\n",
       " ('Also', 3.8999999999999995),\n",
       " ('Alternatively', 3.8999999999999995),\n",
       " ('Australian', 3.8999999999999995),\n",
       " ('Council', 3.8999999999999995),\n",
       " ('Eight', 3.8999999999999995),\n",
       " ('Finally', 3.8999999999999995),\n",
       " ('Group', 3.8999999999999995),\n",
       " ('Here', 3.8999999999999995),\n",
       " ('Methods', 3.8999999999999995),\n",
       " ('Panlex', 3.8999999999999995),\n",
       " ('Parallel', 3.8999999999999995),\n",
       " ('Related', 3.8999999999999995),\n",
       " ('Research', 3.8999999999999995),\n",
       " ('Such', 3.8999999999999995),\n",
       " ('adapted', 3.8999999999999995),\n",
       " ('anonymous', 3.8999999999999995),\n",
       " ('applied', 3.8999999999999995),\n",
       " ('assistance', 3.8999999999999995),\n",
       " ('attained', 3.8999999999999995),\n",
       " ('aware', 3.8999999999999995),\n",
       " ('bias', 3.8999999999999995),\n",
       " ('broadly', 3.8999999999999995),\n",
       " ('categories', 3.8999999999999995),\n",
       " ('categorisation', 3.8999999999999995),\n",
       " ('classifies', 3.8999999999999995),\n",
       " ('collections', 3.8999999999999995),\n",
       " ('comments', 3.8999999999999995),\n",
       " ('commonly', 3.8999999999999995),\n",
       " ('course', 3.8999999999999995),\n",
       " ('credible', 3.8999999999999995),\n",
       " ('density', 3.8999999999999995),\n",
       " ('described', 3.8999999999999995),\n",
       " ('design', 3.8999999999999995),\n",
       " ('designed', 3.8999999999999995),\n",
       " ('detecting', 3.8999999999999995),\n",
       " ('develop', 3.8999999999999995),\n",
       " ('distributional', 3.8999999999999995),\n",
       " ('downside', 3.8999999999999995),\n",
       " ('exist', 3.8999999999999995),\n",
       " ('experiment', 3.8999999999999995),\n",
       " ('experimental', 3.8999999999999995),\n",
       " ('explore', 3.8999999999999995),\n",
       " ('extract', 3.8999999999999995),\n",
       " ('falls', 3.8999999999999995),\n",
       " ('filter', 3.8999999999999995),\n",
       " ('form', 3.8999999999999995),\n",
       " ('four', 3.8999999999999995),\n",
       " ('franca', 3.8999999999999995),\n",
       " ('funding', 3.8999999999999995),\n",
       " ('future', 3.8999999999999995),\n",
       " ('general', 3.8999999999999995),\n",
       " ('glossed', 3.8999999999999995),\n",
       " ('glossing', 3.8999999999999995),\n",
       " ('guarantee', 3.8999999999999995),\n",
       " ('high', 3.8999999999999995),\n",
       " ('hope', 3.8999999999999995),\n",
       " ('identification', 3.8999999999999995),\n",
       " ('identifying', 3.8999999999999995),\n",
       " ('immediately', 3.8999999999999995),\n",
       " ('initial', 3.8999999999999995),\n",
       " ('judged', 3.8999999999999995),\n",
       " ('label', 3.8999999999999995),\n",
       " ('lines', 3.8999999999999995),\n",
       " ('lingua', 3.8999999999999995),\n",
       " ('lists', 3.8999999999999995),\n",
       " ('locate', 3.8999999999999995),\n",
       " ('mix', 3.8999999999999995),\n",
       " ('multi', 3.8999999999999995),\n",
       " ('namely', 3.8999999999999995),\n",
       " ('novel', 3.8999999999999995),\n",
       " ('obvious', 3.8999999999999995),\n",
       " ('particular', 3.8999999999999995),\n",
       " ('possible', 3.8999999999999995),\n",
       " ('predictable', 3.8999999999999995),\n",
       " ('proceeded', 3.8999999999999995),\n",
       " ('realistically', 3.8999999999999995),\n",
       " ('recall', 3.8999999999999995),\n",
       " ('reviewers', 3.8999999999999995),\n",
       " ('same', 3.8999999999999995),\n",
       " ('seeks', 3.8999999999999995),\n",
       " ('similarity', 3.8999999999999995),\n",
       " ('site', 3.8999999999999995),\n",
       " ('source', 3.8999999999999995),\n",
       " ('specifically', 3.8999999999999995),\n",
       " ('strong', 3.8999999999999995),\n",
       " ('suited', 3.8999999999999995),\n",
       " ('supported', 3.8999999999999995),\n",
       " ('task', 3.8999999999999995),\n",
       " ('thank', 3.8999999999999995),\n",
       " ('theoretically', 3.8999999999999995),\n",
       " ('towards', 3.8999999999999995),\n",
       " ('valuable', 3.8999999999999995),\n",
       " ('wish', 3.8999999999999995),\n",
       " ('would', 3.8999999999999995),\n",
       " ('language', 3.882248520710059),\n",
       " ('each', 3.8461538461538467),\n",
       " ('based', 3.544615384615384),\n",
       " ('in', 3.3524216524216524),\n",
       " ('not', 3.338675213675213),\n",
       " ('queries', 3.3333333333333335),\n",
       " ('query', 3.3333333333333335),\n",
       " ('we', 3.0333333333333337),\n",
       " ('words', 2.8205128205128203),\n",
       " ('the', 2.685621301775149),\n",
       " ('dictionaries', 2.577514792899408),\n",
       " ('en', 2.564102564102564),\n",
       " ('or', 2.1717948717948716),\n",
       " ('This', 2.1564102564102563),\n",
       " ('could', 2.1564102564102563),\n",
       " ('pre', 2.1564102564102563),\n",
       " ('Dl', 2.0512820512820515),\n",
       " ('collection', 2.0512820512820515),\n",
       " ('set', 2.0512820512820515),\n",
       " ('web', 2.0192307692307687),\n",
       " ('The', 1.9745192307692307),\n",
       " ('Ll', 1.7948717948717952),\n",
       " ('returned', 1.7948717948717952),\n",
       " ('than', 1.7948717948717952),\n",
       " ('were', 1.7948717948717952),\n",
       " ('method', 1.5427350427350421),\n",
       " ('Apriori', 1.5384615384615388),\n",
       " ('ClueWeb09', 1.5384615384615388),\n",
       " ('For', 1.5384615384615388),\n",
       " ('Table', 1.5384615384615388),\n",
       " ('dataset', 1.5166666666666668),\n",
       " ('over', 1.438782051282051),\n",
       " ('MAP', 1.282051282051282),\n",
       " ('co', 1.282051282051282),\n",
       " ('first', 1.282051282051282),\n",
       " ('found', 1.282051282051282),\n",
       " ('lexical', 1.282051282051282),\n",
       " ('non', 1.282051282051282),\n",
       " ('number', 1.282051282051282),\n",
       " ('only', 1.282051282051282),\n",
       " ('result', 1.282051282051282),\n",
       " ('retrieval', 1.282051282051282),\n",
       " ('target', 1.282051282051282),\n",
       " ('is', 1.2385256410256413),\n",
       " ('by', 1.2341880341880338),\n",
       " ('being', 1.1815384615384616),\n",
       " ('more', 1.1815384615384616),\n",
       " ('term', 1.1815384615384616),\n",
       " ('these', 1.1815384615384616),\n",
       " ('dictionary', 1.1422402159244265),\n",
       " ('also', 1.0782051282051281),\n",
       " ('corpus', 1.0782051282051281),\n",
       " ('developers', 1.0782051282051281),\n",
       " ('direct', 1.0782051282051281),\n",
       " ('engine', 1.0782051282051281),\n",
       " ('exhaustively', 1.0782051282051281),\n",
       " ('existing', 1.0782051282051281),\n",
       " ('far', 1.0782051282051281),\n",
       " ('mining', 1.0782051282051281),\n",
       " ('need', 1.0782051282051281),\n",
       " ('patterns', 1.0782051282051281),\n",
       " ('rather', 1.0782051282051281),\n",
       " ('relatively', 1.0782051282051281),\n",
       " ('sets', 1.0782051282051281),\n",
       " ('should', 1.0782051282051281),\n",
       " ('single', 1.0782051282051281),\n",
       " ('style', 1.0782051282051281),\n",
       " ('train', 1.0782051282051281),\n",
       " ('variety', 1.0782051282051281),\n",
       " ('very', 1.0782051282051281),\n",
       " ('well', 1.0782051282051281),\n",
       " ('what', 1.0782051282051281),\n",
       " ('As', 1.0256410256410258),\n",
       " ('Google', 1.0256410256410258),\n",
       " ('algorithm', 1.0256410256410258),\n",
       " ('approach', 1.0256410256410258),\n",
       " ('at', 1.0256410256410258),\n",
       " ('combinations', 1.0256410256410258),\n",
       " ('if', 1.0256410256410258),\n",
       " ('likelihood', 1.0256410256410258),\n",
       " ('occurrence', 1.0256410256410258),\n",
       " ('scores', 1.0256410256410258),\n",
       " ('support', 1.0256410256410258),\n",
       " ('wj', 1.0256410256410258),\n",
       " ('results', 0.9578347578347578),\n",
       " ('with', 0.9578347578347578),\n",
       " ('languages', 0.924444444444445),\n",
       " ('synthetic', 0.8673659673659673),\n",
       " ('word', 0.8673659673659673),\n",
       " ('be', 0.8591715976331361),\n",
       " ('which', 0.8591715976331361),\n",
       " ('770', 0.7692307692307694),\n",
       " ('Japanese', 0.7692307692307694),\n",
       " ('against', 0.7692307692307694),\n",
       " ('any', 0.7692307692307694),\n",
       " ('average', 0.7692307692307694),\n",
       " ('baseline', 0.7692307692307694),\n",
       " ('certain', 0.7692307692307694),\n",
       " ('criterion', 0.7692307692307694),\n",
       " ('de', 0.7692307692307694),\n",
       " ('evaluation', 0.7692307692307694),\n",
       " ('experiments', 0.7692307692307694),\n",
       " ('formulation', 0.7692307692307694),\n",
       " ('fr', 0.7692307692307694),\n",
       " ('higher', 0.7692307692307694),\n",
       " ('including', 0.7692307692307694),\n",
       " ('intuition', 0.7692307692307694),\n",
       " ('item', 0.7692307692307694),\n",
       " ('learned', 0.7692307692307694),\n",
       " ('least', 0.7692307692307694),\n",
       " ('lexicon', 0.7692307692307694),\n",
       " ('occur', 0.7692307692307694),\n",
       " ('one', 0.7692307692307694),\n",
       " ('original', 0.7692307692307694),\n",
       " ('performed', 0.7692307692307694),\n",
       " ('querying', 0.7692307692307694),\n",
       " ('random', 0.7692307692307694),\n",
       " ('sdict', 0.7692307692307694),\n",
       " ('second', 0.7692307692307694),\n",
       " ('size', 0.7692307692307694),\n",
       " ('standard', 0.7692307692307694),\n",
       " ('those', 0.7692307692307694),\n",
       " ('true', 0.7692307692307694),\n",
       " ('two', 0.7692307692307694),\n",
       " ('whether', 0.7692307692307694),\n",
       " ('such', 0.6170940170940169),\n",
       " ('terms', 0.6170940170940169),\n",
       " ('10', 0.5128205128205129),\n",
       " ('100', 0.5128205128205129),\n",
       " ('1000', 0.5128205128205129),\n",
       " ('13', 0.5128205128205129),\n",
       " ('2009', 0.5128205128205129),\n",
       " ('32', 0.5128205128205129),\n",
       " ('49', 0.5128205128205129),\n",
       " ('50', 0.5128205128205129),\n",
       " ('52', 0.5128205128205129),\n",
       " ('62', 0.5128205128205129),\n",
       " ('63', 0.5128205128205129),\n",
       " ('920', 0.5128205128205129),\n",
       " ('API', 0.5128205128205129),\n",
       " ('Arabic', 0.5128205128205129),\n",
       " ('Below', 0.5128205128205129),\n",
       " ('Chinese', 0.5128205128205129),\n",
       " ('Dictionary', 0.5128205128205129),\n",
       " ('Dicts', 0.5128205128205129),\n",
       " ('Figure', 0.5128205128205129),\n",
       " ('Freedict', 0.5128205128205129),\n",
       " ('German', 0.5128205128205129),\n",
       " ('Italian', 0.5128205128205129),\n",
       " ('Korean', 0.5128205128205129),\n",
       " ('Portuguese', 0.5128205128205129),\n",
       " ('Score', 0.5128205128205129),\n",
       " ('Spanish', 0.5128205128205129),\n",
       " ('To', 0.5128205128205129),\n",
       " ('access', 0.5128205128205129),\n",
       " ('ar', 0.5128205128205129),\n",
       " ('assume', 0.5128205128205129),\n",
       " ('background', 0.5128205128205129),\n",
       " ('best', 0.5128205128205129),\n",
       " ('cod', 0.5128205128205129),\n",
       " ('component', 0.5128205128205129),\n",
       " ('contained', 0.5128205128205129),\n",
       " ('contains', 0.5128205128205129),\n",
       " ('coverage', 0.5128205128205129),\n",
       " ('data', 0.5128205128205129),\n",
       " ('detail', 0.5128205128205129),\n",
       " ('determination', 0.5128205128205129),\n",
       " ('discriminating', 0.5128205128205129),\n",
       " ('effectiveness', 0.5128205128205129),\n",
       " ('efficiently', 0.5128205128205129),\n",
       " ('empty', 0.5128205128205129),\n",
       " ('es', 0.5128205128205129),\n",
       " ('evaluate', 0.5128205128205129),\n",
       " ('follows', 0.5128205128205129),\n",
       " ('function', 0.5128205128205129),\n",
       " ('greater', 0.5128205128205129),\n",
       " ('included', 0.5128205128205129),\n",
       " ('index', 0.5128205128205129),\n",
       " ('indexed', 0.5128205128205129),\n",
       " ('indicated', 0.5128205128205129),\n",
       " ('injected', 0.5128205128205129),\n",
       " ('interest', 0.5128205128205129),\n",
       " ('iteration', 0.5128205128205129),\n",
       " ('largely', 0.5128205128205129),\n",
       " ('likely', 0.5128205128205129),\n",
       " ('lower', 0.5128205128205129),\n",
       " ('may', 0.5128205128205129),\n",
       " ('mean', 0.5128205128205129),\n",
       " ('methodology', 0.5128205128205129),\n",
       " ('modified', 0.5128205128205129),\n",
       " ('monolingual', 0.5128205128205129),\n",
       " ('next', 0.5128205128205129),\n",
       " ('others', 0.5128205128205129),\n",
       " ('prefer', 0.5128205128205129),\n",
       " ('proportions', 0.5128205128205129),\n",
       " ('ranked', 0.5128205128205129),\n",
       " ('records', 0.5128205128205129),\n",
       " ('resources', 0.5128205128205129),\n",
       " ('return', 0.5128205128205129),\n",
       " ('score', 0.5128205128205129),\n",
       " ('selection', 0.5128205128205129),\n",
       " ('simply', 0.5128205128205129),\n",
       " ('some', 0.5128205128205129),\n",
       " ('step', 0.5128205128205129),\n",
       " ('substantially', 0.5128205128205129),\n",
       " ('targeted', 0.5128205128205129),\n",
       " ('tend', 0.5128205128205129),\n",
       " ('then', 0.5128205128205129),\n",
       " ('they', 0.5128205128205129),\n",
       " ('third', 0.5128205128205129),\n",
       " ('total', 0.5128205128205129),\n",
       " ('underestimate', 0.5128205128205129),\n",
       " ('unique', 0.5128205128205129),\n",
       " ('use', 0.5128205128205129),\n",
       " ('via', 0.5128205128205129),\n",
       " ('weight', 0.5128205128205129),\n",
       " ('English', 0.4789173789173789),\n",
       " ('Wikipedia', 0.4789173789173789),\n",
       " ('are', 0.40394477317554245),\n",
       " ('multilingual', 0.3634615384615383),\n",
       " ('ability', 0.30854700854700845),\n",
       " ('analysis', 0.30854700854700845),\n",
       " ('articles', 0.30854700854700845),\n",
       " ('assumption', 0.30854700854700845),\n",
       " ('both', 0.30854700854700845),\n",
       " ('can', 0.30854700854700845),\n",
       " ('combination', 0.30854700854700845),\n",
       " ('comparable', 0.30854700854700845),\n",
       " ('conventional', 0.30854700854700845),\n",
       " ('different', 0.30854700854700845),\n",
       " ('few', 0.30854700854700845),\n",
       " ('highly', 0.30854700854700845),\n",
       " ('into', 0.30854700854700845),\n",
       " ('often', 0.30854700854700845),\n",
       " ('pairs', 0.30854700854700845),\n",
       " ('precision', 0.30854700854700845),\n",
       " ('present', 0.30854700854700845),\n",
       " ('was', 0.30854700854700845),\n",
       " ('We', 0.2871794871794871),\n",
       " ('all', 0.2871794871794871),\n",
       " ('it', 0.2871794871794871),\n",
       " ('00', 0.25641025641025644),\n",
       " ('000', 0.25641025641025644),\n",
       " ('00Average', 0.25641025641025644),\n",
       " ('00de0', 0.25641025641025644),\n",
       " ('00es0', 0.25641025641025644),\n",
       " ('00zh0', 0.25641025641025644),\n",
       " ('01ar0', 0.25641025641025644),\n",
       " ('02', 0.25641025641025644),\n",
       " ('03it0', 0.25641025641025644),\n",
       " ('04Table', 0.25641025641025644),\n",
       " ('04de0', 0.25641025641025644),\n",
       " ('04ja0', 0.25641025641025644),\n",
       " ('05', 0.25641025641025644),\n",
       " ('08fr0', 0.25641025641025644),\n",
       " ('08it0', 0.25641025641025644),\n",
       " ('09fr0', 0.25641025641025644),\n",
       " ('0ar0', 0.25641025641025644),\n",
       " ('0it0', 0.25641025641025644),\n",
       " ('11ar0', 0.25641025641025644),\n",
       " ('12Table', 0.25641025641025644),\n",
       " ('13ja0', 0.25641025641025644),\n",
       " ('17', 0.25641025641025644),\n",
       " ('17Average', 0.25641025641025644),\n",
       " ('19es0', 0.25641025641025644),\n",
       " ('20', 0.25641025641025644),\n",
       " ('21', 0.25641025641025644),\n",
       " ('246', 0.25641025641025644),\n",
       " ('2Table', 0.25641025641025644),\n",
       " ('2zh0', 0.25641025641025644),\n",
       " ('320', 0.25641025641025644),\n",
       " ('33', 0.25641025641025644),\n",
       " ('390', 0.25641025641025644),\n",
       " ('3de1', 0.25641025641025644),\n",
       " ('41', 0.25641025641025644),\n",
       " ('47', 0.25641025641025644),\n",
       " ('48', 0.25641025641025644),\n",
       " ('550', 0.25641025641025644),\n",
       " ('61', 0.25641025641025644),\n",
       " ('630', 0.25641025641025644),\n",
       " ('690', 0.25641025641025644),\n",
       " ('6es0', 0.25641025641025644),\n",
       " ('74', 0.25641025641025644),\n",
       " ('750', 0.25641025641025644),\n",
       " ('79', 0.25641025641025644),\n",
       " ('7fr0', 0.25641025641025644),\n",
       " ('840', 0.25641025641025644),\n",
       " ('880', 0.25641025641025644),\n",
       " ('89', 0.25641025641025644),\n",
       " ('92', 0.25641025641025644),\n",
       " ('93', 0.25641025641025644),\n",
       " ('940', 0.25641025641025644),\n",
       " ('950', 0.25641025641025644),\n",
       " ('970', 0.25641025641025644),\n",
       " ('980', 0.25641025641025644),\n",
       " ('9ja0', 0.25641025641025644),\n",
       " ('Among', 0.25641025641025644),\n",
       " ('Based', 0.25641025641025644),\n",
       " ('Boolean', 0.25641025641025644),\n",
       " ('Despite', 0.25641025641025644),\n",
       " ('Details', 0.25641025641025644),\n",
       " ('DictionariesQueries', 0.25641025641025644),\n",
       " ('Each', 0.25641025641025644),\n",
       " ('Encouragingly', 0.25641025641025644),\n",
       " ('Examples', 0.25641025641025644),\n",
       " ('Factors', 0.25641025641025644),\n",
       " ('February', 0.25641025641025644),\n",
       " ('First', 0.25641025641025644),\n",
       " ('French', 0.25641025641025644),\n",
       " ('Given', 0.25641025641025644),\n",
       " ('Having', 0.25641025641025644),\n",
       " ('Indri', 0.25641025641025644),\n",
       " ('January', 0.25641025641025644),\n",
       " ('Language', 0.25641025641025644),\n",
       " ('Lexical', 0.25641025641025644),\n",
       " ('Looking', 0.25641025641025644),\n",
       " ('MeCab', 0.25641025641025644),\n",
       " ('Morphological', 0.25641025641025644),\n",
       " ('Most', 0.25641025641025644),\n",
       " ('Nl', 0.25641025641025644),\n",
       " ('Note', 0.25641025641025644),\n",
       " ('Our', 0.25641025641025644),\n",
       " ('Pwi', 0.25641025641025644),\n",
       " ('P𝑝𝑎𝑝𝑒𝑟', 0.25641025641025644),\n",
       " ('P𝑝𝑎𝑝𝑦𝑟𝑢𝑠', 0.25641025641025644),\n",
       " ('Recall', 0.25641025641025644),\n",
       " ('See', 0.25641025641025644),\n",
       " ('Segmenter', 0.25641025641025644),\n",
       " ('Stanford', 0.25641025641025644),\n",
       " ('Swaheli', 0.25641025641025644),\n",
       " ('Synthetic', 0.25641025641025644),\n",
       " ('That', 0.25641025641025644),\n",
       " ('WikipediaBased', 0.25641025641025644),\n",
       " ('Word', 0.25641025641025644),\n",
       " ('able', 0.25641025641025644),\n",
       " ('absence', 0.25641025641025644),\n",
       " ('accordance', 0.25641025641025644),\n",
       " ('achieved', 0.25641025641025644),\n",
       " ('achieving', 0.25641025641025644),\n",
       " ('actual', 0.25641025641025644),\n",
       " ('adding', 0.25641025641025644),\n",
       " ('affected', 0.25641025641025644),\n",
       " ('almost', 0.25641025641025644),\n",
       " ('anchor', 0.25641025641025644),\n",
       " ('anti', 0.25641025641025644),\n",
       " ('apply', 0.25641025641025644),\n",
       " ('arbitrarily', 0.25641025641025644),\n",
       " ('around', 0.25641025641025644),\n",
       " ('basic', 0.25641025641025644),\n",
       " ('because', 0.25641025641025644),\n",
       " ('below', 0.25641025641025644),\n",
       " ('between', 0.25641025641025644),\n",
       " ('billion', 0.25641025641025644),\n",
       " ('calculate', 0.25641025641025644),\n",
       " ('calculated', 0.25641025641025644),\n",
       " ('calculation', 0.25641025641025644),\n",
       " ('carried', 0.25641025641025644),\n",
       " ('certainly', 0.25641025641025644),\n",
       " ('class', 0.25641025641025644),\n",
       " ('collected', 0.25641025641025644),\n",
       " ('collocate', 0.25641025641025644),\n",
       " ('combined', 0.25641025641025644),\n",
       " ('combining', 0.25641025641025644),\n",
       " ('community', 0.25641025641025644),\n",
       " ('comparably', 0.25641025641025644),\n",
       " ('compiled', 0.25641025641025644),\n",
       " ('complement', 0.25641025641025644),\n",
       " ('considerably', 0.25641025641025644),\n",
       " ('consists', 0.25641025641025644),\n",
       " ('constructed', 0.25641025641025644),\n",
       " ('cooccur', 0.25641025641025644),\n",
       " ('count', 0.25641025641025644),\n",
       " ('creates', 0.25641025641025644),\n",
       " ('cruiser', 0.25641025641025644),\n",
       " ('cscore', 0.25641025641025644),\n",
       " ('cutoff', 0.25641025641025644),\n",
       " ('decreasing', 0.25641025641025644),\n",
       " ('describe', 0.25641025641025644),\n",
       " ('description', 0.25641025641025644),\n",
       " ('detailed', 0.25641025641025644),\n",
       " ('details', 0.25641025641025644),\n",
       " ('developed', 0.25641025641025644),\n",
       " ('developer', 0.25641025641025644),\n",
       " ('differ', 0.25641025641025644),\n",
       " ('discovery', 0.25641025641025644),\n",
       " ('discriminatory', 0.25641025641025644),\n",
       " ('do', 0.25641025641025644),\n",
       " ('documentsNote', 0.25641025641025644),\n",
       " ('downsampled', 0.25641025641025644),\n",
       " ('due', 0.25641025641025644),\n",
       " ('dumps', 0.25641025641025644),\n",
       " ('easily', 0.25641025641025644),\n",
       " ('effective', 0.25641025641025644),\n",
       " ('elements', 0.25641025641025644),\n",
       " ('end', 0.25641025641025644),\n",
       " ('especially', 0.25641025641025644),\n",
       " ('estimated', 0.25641025641025644),\n",
       " ('evaluated', 0.25641025641025644),\n",
       " ('evaluates', 0.25641025641025644),\n",
       " ('evaluating', 0.25641025641025644),\n",
       " ('filtering', 0.25641025641025644),\n",
       " ('final', 0.25641025641025644),\n",
       " ('follow', 0.25641025641025644),\n",
       " ('formed', 0.25641025641025644),\n",
       " ('forms', 0.25641025641025644),\n",
       " ('frequent', 0.25641025641025644),\n",
       " ('generate', 0.25641025641025644),\n",
       " ('generation', 0.25641025641025644),\n",
       " ('genuine', 0.25641025641025644),\n",
       " ('gospel', 0.25641025641025644),\n",
       " ('guaranteed', 0.25641025641025644),\n",
       " ('had', 0.25641025641025644),\n",
       " ('hand', 0.25641025641025644),\n",
       " ('highest', 0.25641025641025644),\n",
       " ('iS', 0.25641025641025644),\n",
       " ('iff', 0.25641025641025644),\n",
       " ('impact', 0.25641025641025644),\n",
       " ('include', 0.25641025641025644),\n",
       " ('incorporating', 0.25641025641025644),\n",
       " ('increasing', 0.25641025641025644),\n",
       " ('indeed', 0.25641025641025644),\n",
       " ('indeterminate', 0.25641025641025644),\n",
       " ('individual', 0.25641025641025644),\n",
       " ('inflected', 0.25641025641025644),\n",
       " ('information', 0.25641025641025644),\n",
       " ('instance', 0.25641025641025644),\n",
       " ('inverted', 0.25641025641025644),\n",
       " ('involving', 0.25641025641025644),\n",
       " ('issue', 0.25641025641025644),\n",
       " ('itemset', 0.25641025641025644),\n",
       " ('ja', 0.25641025641025644),\n",
       " ('knowledge', 0.25641025641025644),\n",
       " ('ko', 0.25641025641025644),\n",
       " ('lNl', 0.25641025641025644),\n",
       " ('learnedAvg', 0.25641025641025644),\n",
       " ('lemmas', 0.25641025641025644),\n",
       " ('length', 0.25641025641025644),\n",
       " ('lengthen3', 0.25641025641025644),\n",
       " ('lexemes', 0.25641025641025644),\n",
       " ('lexicographer', 0.25641025641025644),\n",
       " ('lexicons', 0.25641025641025644),\n",
       " ('line', 0.25641025641025644),\n",
       " ('link', 0.25641025641025644),\n",
       " ('list', 0.25641025641025644),\n",
       " ('little', 0.25641025641025644),\n",
       " ('local', 0.25641025641025644),\n",
       " ('log', 0.25641025641025644),\n",
       " ('longest', 0.25641025641025644),\n",
       " ('manual', 0.25641025641025644),\n",
       " ('mature', 0.25641025641025644),\n",
       " ('maxlPl', 0.25641025641025644),\n",
       " ('million', 0.25641025641025644),\n",
       " ('minus', 0.25641025641025644),\n",
       " ('mono', 0.25641025641025644),\n",
       " ('most', 0.25641025641025644),\n",
       " ('naively', 0.25641025641025644),\n",
       " ('name', 0.25641025641025644),\n",
       " ('needs', 0.25641025641025644),\n",
       " ('new', 0.25641025641025644),\n",
       " ('noodle', 0.25641025641025644),\n",
       " ('noting', 0.25641025641025644),\n",
       " ('nouns', 0.25641025641025644),\n",
       " ('obtained', 0.25641025641025644),\n",
       " ('occurs', 0.25641025641025644),\n",
       " ('optimisation', 0.25641025641025644),\n",
       " ('order', 0.25641025641025644),\n",
       " ('org', 0.25641025641025644),\n",
       " ('otherwise', 0.25641025641025644),\n",
       " ('out', 0.25641025641025644),\n",
       " ('outside', 0.25641025641025644),\n",
       " ('pages', 0.25641025641025644),\n",
       " ('pair', 0.25641025641025644),\n",
       " ('paired', 0.25641025641025644),\n",
       " ('pairing', 0.25641025641025644),\n",
       " ('panlex', 0.25641025641025644),\n",
       " ('paper', 0.25641025641025644),\n",
       " ('perform', 0.25641025641025644),\n",
       " ('predict', 0.25641025641025644),\n",
       " ('predominantly', 0.25641025641025644),\n",
       " ('preferred', 0.25641025641025644),\n",
       " ('presented', 0.25641025641025644),\n",
       " ('prevalence', 0.25641025641025644),\n",
       " ('previous', 0.25641025641025644),\n",
       " ('prior', 0.25641025641025644),\n",
       " ('priori', 0.25641025641025644),\n",
       " ('proportion', 0.25641025641025644),\n",
       " ('pruned', 0.25641025641025644),\n",
       " ('pt', 0.25641025641025644),\n",
       " ('question', 0.25641025641025644),\n",
       " ('randomly', 0.25641025641025644),\n",
       " ('range', 0.25641025641025644),\n",
       " ('ranking', 0.25641025641025644),\n",
       " ('regular', 0.25641025641025644),\n",
       " ('reject', 0.25641025641025644),\n",
       " ('relative', 0.25641025641025644),\n",
       " ('removed', 0.25641025641025644),\n",
       " ('respectively', 0.25641025641025644),\n",
       " ('restricting', 0.25641025641025644),\n",
       " ('resultant', 0.25641025641025644),\n",
       " ('retrieve', 0.25641025641025644),\n",
       " ('retrieved', 0.25641025641025644),\n",
       " ('returns', 0.25641025641025644),\n",
       " ('said', 0.25641025641025644),\n",
       " ('satisfying', 0.25641025641025644),\n",
       " ('scope', 0.25641025641025644),\n",
       " ('sdictwi', 0.25641025641025644),\n",
       " ('segmentation', 0.25641025641025644),\n",
       " ('select', 0.25641025641025644),\n",
       " ('selected', 0.25641025641025644),\n",
       " ('selective', 0.25641025641025644),\n",
       " ('simple', 0.25641025641025644),\n",
       " ('small', 0.25641025641025644),\n",
       " ('smallerLangDictsMAPBaselineen0', 0.25641025641025644),\n",
       " ('smallerLangDictsMAPBaselinezh0', 0.25641025641025644),\n",
       " ('smallerLangWikipedia', 0.25641025641025644),\n",
       " ('smallerLanguageProportionen', 0.25641025641025644),\n",
       " ('so', 0.25641025641025644),\n",
       " ('sophistication', 0.25641025641025644),\n",
       " ('sourced', 0.25641025641025644),\n",
       " ('specialised', 0.25641025641025644),\n",
       " ('subset', 0.25641025641025644),\n",
       " ('successful', 0.25641025641025644),\n",
       " ('suggest', 0.25641025641025644),\n",
       " ('sushi', 0.25641025641025644),\n",
       " ('suspect', 0.25641025641025644),\n",
       " ('system', 0.25641025641025644),\n",
       " ('systematicity', 0.25641025641025644),\n",
       " ('ten', 0.25641025641025644),\n",
       " ('terminology', 0.25641025641025644),\n",
       " ('text', 0.25641025641025644),\n",
       " ('them', 0.25641025641025644),\n",
       " ('three', 0.25641025641025644),\n",
       " ('threshold', 0.25641025641025644),\n",
       " ('thus', 0.25641025641025644),\n",
       " ('together', 0.25641025641025644),\n",
       " ('transliterated', 0.25641025641025644),\n",
       " ('typicality', 0.25641025641025644),\n",
       " ('underlying', 0.25641025641025644),\n",
       " ('until', 0.25641025641025644),\n",
       " ('usable', 0.25641025641025644),\n",
       " ('value', 0.25641025641025644),\n",
       " ('values', 0.25641025641025644),\n",
       " ('varies', 0.25641025641025644),\n",
       " ('verbs', 0.25641025641025644),\n",
       " ('vocabulary', 0.25641025641025644),\n",
       " ('want', 0.25641025641025644),\n",
       " ('way', 0.25641025641025644),\n",
       " ('ways', 0.25641025641025644),\n",
       " ('weighing', 0.25641025641025644),\n",
       " ('whereas', 0.25641025641025644),\n",
       " ('whereby', 0.25641025641025644),\n",
       " ('while', 0.25641025641025644),\n",
       " ('wide', 0.25641025641025644),\n",
       " ('wiotherwisewhere', 0.25641025641025644),\n",
       " ('within', 0.25641025641025644),\n",
       " ('wn', 0.25641025641025644),\n",
       " ('zero', 0.25641025641025644),\n",
       " ('zh', 0.25641025641025644),\n",
       " ('documents', 0.2483642793987621),\n",
       " ('given', 0.20197238658777122),\n",
       " ('an', 0.16153846153846158),\n",
       " ('contain', 0.16153846153846158),\n",
       " ('that', 0.15957767722473595),\n",
       " ('construction', 0.10384615384615382),\n",
       " ('of', 0.0622354991287028),\n",
       " ('this', 0.057001972386587774),\n",
       " ('their', 0.05192307692307691),\n",
       " ('where', 0.05192307692307691),\n",
       " ('containing', 0.05170940170940177),\n",
       " ('training', 0.05170940170940177),\n",
       " ('using', 0.05170940170940177),\n",
       " ('In', 0.0005128205128205117),\n",
       " ('no', 0.0005128205128205117),\n",
       " ('open', 0.0005128205128205117),\n",
       " ('other', 0.0005128205128205117),\n",
       " ('potentially', 0.0005128205128205117),\n",
       " ('search', 0.0005128205128205117),\n",
       " ('and', 0.00017383746197304872)]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorted(InfoF().chi2_f(_input), key=lambda x: x[1], reverse=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('work', 0.14538956026906502),\n",
       " ('genre', 0.14538956026906494),\n",
       " ('methods', 0.14538956026906494),\n",
       " ('the', 0.1126214712083772),\n",
       " ('for', 0.10906528613184305),\n",
       " ('to', 0.10889791737812755),\n",
       " ('automatically', 0.105657356783042),\n",
       " ('parallel', 0.105657356783042),\n",
       " ('been', 0.10565735678304196),\n",
       " ('classification', 0.10565735678304196),\n",
       " ('content', 0.10565735678304196),\n",
       " ('on', 0.10419015154123319),\n",
       " ('there', 0.09728396639388939),\n",
       " ('our', 0.08565968366868762),\n",
       " ('as', 0.07344940760146343),\n",
       " ('research', 0.07343406279666506),\n",
       " ('While', 0.06840691286828929),\n",
       " ('although', 0.06840691286828929),\n",
       " ('corpora', 0.06840691286828929),\n",
       " ('crosslingual', 0.06840691286828929),\n",
       " ('detection', 0.06840691286828929),\n",
       " ('features', 0.06840691286828929),\n",
       " ('generally', 0.06840691286828929),\n",
       " ('mixture', 0.06840691286828929),\n",
       " ('structural', 0.06840691286828929),\n",
       " ('technical', 0.06840691286828929),\n",
       " ('thesauri', 0.06840691286828929),\n",
       " ('type', 0.06840691286828929),\n",
       " ('identify', 0.06316187700559717),\n",
       " ('relevant', 0.06316187700559717),\n",
       " ('and', 0.06265098261378572),\n",
       " ('not', 0.06258434068951566),\n",
       " ('construction', 0.06065482112127292),\n",
       " ('of', 0.05941627382187788),\n",
       " ('is', 0.05887161718437542),\n",
       " ('document', 0.058108482172317566),\n",
       " ('dictionaries', 0.05615065915278185),\n",
       " ('We', 0.05579823544033276),\n",
       " ('it', 0.05579823544033276),\n",
       " ('over', 0.05579823544033276),\n",
       " ('results', 0.0538945258625),\n",
       " ('each', 0.05291765875338847),\n",
       " ('query', 0.05291765875338847),\n",
       " ('such', 0.051067325187264964),\n",
       " ('terms', 0.051067325187264964),\n",
       " ('or', 0.0474658978849327),\n",
       " ('queries', 0.04695987129942239),\n",
       " ('contain', 0.04645572284093018),\n",
       " ('term', 0.04645572284093018),\n",
       " ('have', 0.04450094421562542),\n",
       " ('proposed', 0.04396831187443076),\n",
       " ('from', 0.04388512371832133),\n",
       " ('we', 0.043513564272425045),\n",
       " ('language', 0.0422192421009579),\n",
       " ('words', 0.04117222474545709),\n",
       " ('be', 0.03962804194685303),\n",
       " ('web', 0.03824048979039209),\n",
       " ('bilingual', 0.03811686742423033),\n",
       " ('analysis', 0.03756729517714112),\n",
       " ('can', 0.03756729517714112),\n",
       " ('comparable', 0.03756729517714112),\n",
       " ('construct', 0.03756729517714112),\n",
       " ('domain', 0.03756729517714112),\n",
       " ('its', 0.03756729517714112),\n",
       " ('precision', 0.03756729517714112),\n",
       " ('more', 0.03723632724796322),\n",
       " ('in', 0.03709006050221847),\n",
       " ('given', 0.03708031470151073),\n",
       " ('that', 0.03577102161505881),\n",
       " ('collection', 0.03554521081743381),\n",
       " ('set', 0.03554521081743381),\n",
       " ('were', 0.03554521081743378),\n",
       " ('based', 0.03505588915494404),\n",
       " ('but', 0.03328977832821553),\n",
       " ('low', 0.03328977832821553),\n",
       " ('used', 0.03328977832821553),\n",
       " ('Acknowledgements', 0.03327992550872365),\n",
       " ('Also', 0.03327992550872365),\n",
       " ('Alternatively', 0.03327992550872365),\n",
       " ('Australian', 0.03327992550872365),\n",
       " ('Council', 0.03327992550872365),\n",
       " ('Eight', 0.03327992550872365),\n",
       " ('Finally', 0.03327992550872365),\n",
       " ('Group', 0.03327992550872365),\n",
       " ('Here', 0.03327992550872365),\n",
       " ('Methods', 0.03327992550872365),\n",
       " ('Panlex', 0.03327992550872365),\n",
       " ('Parallel', 0.03327992550872365),\n",
       " ('Related', 0.03327992550872365),\n",
       " ('Research', 0.03327992550872365),\n",
       " ('Such', 0.03327992550872365),\n",
       " ('adapted', 0.03327992550872365),\n",
       " ('anonymous', 0.03327992550872365),\n",
       " ('applied', 0.03327992550872365),\n",
       " ('assistance', 0.03327992550872365),\n",
       " ('attained', 0.03327992550872365),\n",
       " ('automatic', 0.03327992550872365),\n",
       " ('aware', 0.03327992550872365),\n",
       " ('bias', 0.03327992550872365),\n",
       " ('broadly', 0.03327992550872365),\n",
       " ('categories', 0.03327992550872365),\n",
       " ('categorisation', 0.03327992550872365),\n",
       " ('classifies', 0.03327992550872365),\n",
       " ('collections', 0.03327992550872365),\n",
       " ('comments', 0.03327992550872365),\n",
       " ('commonly', 0.03327992550872365),\n",
       " ('course', 0.03327992550872365),\n",
       " ('credible', 0.03327992550872365),\n",
       " ('density', 0.03327992550872365),\n",
       " ('described', 0.03327992550872365),\n",
       " ('design', 0.03327992550872365),\n",
       " ('designed', 0.03327992550872365),\n",
       " ('detect', 0.03327992550872365),\n",
       " ('detecting', 0.03327992550872365),\n",
       " ('develop', 0.03327992550872365),\n",
       " ('distributional', 0.03327992550872365),\n",
       " ('downside', 0.03327992550872365),\n",
       " ('exist', 0.03327992550872365),\n",
       " ('experiment', 0.03327992550872365),\n",
       " ('experimental', 0.03327992550872365),\n",
       " ('explore', 0.03327992550872365),\n",
       " ('extract', 0.03327992550872365),\n",
       " ('falls', 0.03327992550872365),\n",
       " ('filter', 0.03327992550872365),\n",
       " ('form', 0.03327992550872365),\n",
       " ('four', 0.03327992550872365),\n",
       " ('franca', 0.03327992550872365),\n",
       " ('funding', 0.03327992550872365),\n",
       " ('future', 0.03327992550872365),\n",
       " ('general', 0.03327992550872365),\n",
       " ('glossed', 0.03327992550872365),\n",
       " ('glossing', 0.03327992550872365),\n",
       " ('guarantee', 0.03327992550872365),\n",
       " ('has', 0.03327992550872365),\n",
       " ('high', 0.03327992550872365),\n",
       " ('hope', 0.03327992550872365),\n",
       " ('identification', 0.03327992550872365),\n",
       " ('identifying', 0.03327992550872365),\n",
       " ('immediately', 0.03327992550872365),\n",
       " ('initial', 0.03327992550872365),\n",
       " ('judged', 0.03327992550872365),\n",
       " ('label', 0.03327992550872365),\n",
       " ('lines', 0.03327992550872365),\n",
       " ('lingua', 0.03327992550872365),\n",
       " ('lists', 0.03327992550872365),\n",
       " ('locate', 0.03327992550872365),\n",
       " ('mix', 0.03327992550872365),\n",
       " ('multi', 0.03327992550872365),\n",
       " ('namely', 0.03327992550872365),\n",
       " ('novel', 0.03327992550872365),\n",
       " ('obvious', 0.03327992550872365),\n",
       " ('particular', 0.03327992550872365),\n",
       " ('possible', 0.03327992550872365),\n",
       " ('predictable', 0.03327992550872365),\n",
       " ('proceeded', 0.03327992550872365),\n",
       " ('realistically', 0.03327992550872365),\n",
       " ('recall', 0.03327992550872365),\n",
       " ('reviewers', 0.03327992550872365),\n",
       " ('same', 0.03327992550872365),\n",
       " ('seeks', 0.03327992550872365),\n",
       " ('similarity', 0.03327992550872365),\n",
       " ('site', 0.03327992550872365),\n",
       " ('source', 0.03327992550872365),\n",
       " ('specific', 0.03327992550872365),\n",
       " ('specifically', 0.03327992550872365),\n",
       " ('strong', 0.03327992550872365),\n",
       " ('suited', 0.03327992550872365),\n",
       " ('supported', 0.03327992550872365),\n",
       " ('task', 0.03327992550872365),\n",
       " ('thank', 0.03327992550872365),\n",
       " ('theoretically', 0.03327992550872365),\n",
       " ('towards', 0.03327992550872365),\n",
       " ('valuable', 0.03327992550872365),\n",
       " ('wish', 0.03327992550872365),\n",
       " ('would', 0.03327992550872365),\n",
       " ('multilingual', 0.0331904227734595),\n",
       " ('documents', 0.032066281421792894),\n",
       " ('The', 0.03148239778758944),\n",
       " ('wi', 0.030070101263982546),\n",
       " ('returned', 0.030070101263982518),\n",
       " ('dictionary', 0.024755698986351632),\n",
       " ('For', 0.02473886434511202),\n",
       " ('MAP', 0.02473886434511202),\n",
       " ('Table', 0.02473886434511202),\n",
       " ('found', 0.02473886434511202),\n",
       " ('non', 0.02473886434511202),\n",
       " ('number', 0.02473886434511202),\n",
       " ('only', 0.02473886434511202),\n",
       " ('result', 0.02473886434511202),\n",
       " ('retrieval', 0.02473886434511202),\n",
       " ('target', 0.02473886434511202),\n",
       " ('than', 0.02473886434511202),\n",
       " ('with', 0.021423103510773922),\n",
       " ('This', 0.019623139138670476),\n",
       " ('could', 0.019623139138670476),\n",
       " ('pre', 0.019623139138670476),\n",
       " ('ClueWeb09', 0.01954409226182821),\n",
       " ('en', 0.01954409226182821),\n",
       " ('first', 0.01954409226182821),\n",
       " ('lexical', 0.01954409226182821),\n",
       " ('As', 0.019544092261828183),\n",
       " ('Google', 0.019544092261828183),\n",
       " ('combinations', 0.019544092261828183),\n",
       " ('if', 0.019544092261828183),\n",
       " ('likelihood', 0.019544092261828183),\n",
       " ('occurrence', 0.019544092261828183),\n",
       " ('scores', 0.019544092261828183),\n",
       " ('dataset', 0.018698512666893652),\n",
       " ('this', 0.018622699522342634),\n",
       " ('method', 0.017679055737338203),\n",
       " ('languages', 0.01643703786466784),\n",
       " ('all', 0.015724299063728908),\n",
       " ('Apriori', 0.014478937842572925),\n",
       " ('Dl', 0.014478937842572925),\n",
       " ('Japanese', 0.014478937842572925),\n",
       " ('Ll', 0.014478937842572925),\n",
       " ('against', 0.014478937842572925),\n",
       " ('algorithm', 0.014478937842572925),\n",
       " ('approach', 0.014478937842572925),\n",
       " ('at', 0.014478937842572925),\n",
       " ('baseline', 0.014478937842572925),\n",
       " ('co', 0.014478937842572925),\n",
       " ('criterion', 0.014478937842572925),\n",
       " ('de', 0.014478937842572925),\n",
       " ('experiments', 0.014478937842572925),\n",
       " ('formulation', 0.014478937842572925),\n",
       " ('fr', 0.014478937842572925),\n",
       " ('higher', 0.014478937842572925),\n",
       " ('including', 0.014478937842572925),\n",
       " ('intuition', 0.014478937842572925),\n",
       " ('least', 0.014478937842572925),\n",
       " ('occur', 0.014478937842572925),\n",
       " ('one', 0.014478937842572925),\n",
       " ('querying', 0.014478937842572925),\n",
       " ('random', 0.014478937842572925),\n",
       " ('second', 0.014478937842572925),\n",
       " ('size', 0.014478937842572925),\n",
       " ('standard', 0.014478937842572925),\n",
       " ('support', 0.014478937842572925),\n",
       " ('those', 0.014478937842572925),\n",
       " ('true', 0.014478937842572925),\n",
       " ('whether', 0.014478937842572925),\n",
       " ('their', 0.013665813115674372),\n",
       " ('synthetic', 0.011979368625942469),\n",
       " ('being', 0.011576558547610083),\n",
       " ('these', 0.011576558547610083),\n",
       " ('by', 0.010546007571312063),\n",
       " ('which', 0.009948768134884114),\n",
       " ('1000', 0.00953705909853153),\n",
       " ('2009', 0.00953705909853153),\n",
       " ('32', 0.00953705909853153),\n",
       " ('49', 0.00953705909853153),\n",
       " ('50', 0.00953705909853153),\n",
       " ('52', 0.00953705909853153),\n",
       " ('770', 0.00953705909853153),\n",
       " ('920', 0.00953705909853153),\n",
       " ('API', 0.00953705909853153),\n",
       " ('Arabic', 0.00953705909853153),\n",
       " ('Below', 0.00953705909853153),\n",
       " ('Chinese', 0.00953705909853153),\n",
       " ('Dictionary', 0.00953705909853153),\n",
       " ('Dicts', 0.00953705909853153),\n",
       " ('Freedict', 0.00953705909853153),\n",
       " ('German', 0.00953705909853153),\n",
       " ('Italian', 0.00953705909853153),\n",
       " ('Score', 0.00953705909853153),\n",
       " ('Spanish', 0.00953705909853153),\n",
       " ('To', 0.00953705909853153),\n",
       " ('access', 0.00953705909853153),\n",
       " ('any', 0.00953705909853153),\n",
       " ('ar', 0.00953705909853153),\n",
       " ('assume', 0.00953705909853153),\n",
       " ('average', 0.00953705909853153),\n",
       " ('background', 0.00953705909853153),\n",
       " ('best', 0.00953705909853153),\n",
       " ('certain', 0.00953705909853153),\n",
       " ('component', 0.00953705909853153),\n",
       " ('contained', 0.00953705909853153),\n",
       " ('contains', 0.00953705909853153),\n",
       " ('coverage', 0.00953705909853153),\n",
       " ('data', 0.00953705909853153),\n",
       " ('detail', 0.00953705909853153),\n",
       " ('determination', 0.00953705909853153),\n",
       " ('discriminating', 0.00953705909853153),\n",
       " ('effectiveness', 0.00953705909853153),\n",
       " ('empty', 0.00953705909853153),\n",
       " ('es', 0.00953705909853153),\n",
       " ('evaluate', 0.00953705909853153),\n",
       " ('evaluation', 0.00953705909853153),\n",
       " ('follows', 0.00953705909853153),\n",
       " ('function', 0.00953705909853153),\n",
       " ('greater', 0.00953705909853153),\n",
       " ('included', 0.00953705909853153),\n",
       " ('index', 0.00953705909853153),\n",
       " ('indexed', 0.00953705909853153),\n",
       " ('indicated', 0.00953705909853153),\n",
       " ('injected', 0.00953705909853153),\n",
       " ('interest', 0.00953705909853153),\n",
       " ('item', 0.00953705909853153),\n",
       " ('largely', 0.00953705909853153),\n",
       " ('lexicon', 0.00953705909853153),\n",
       " ('likely', 0.00953705909853153),\n",
       " ('lower', 0.00953705909853153),\n",
       " ('methodology', 0.00953705909853153),\n",
       " ('modified', 0.00953705909853153),\n",
       " ('monolingual', 0.00953705909853153),\n",
       " ('next', 0.00953705909853153),\n",
       " ('original', 0.00953705909853153),\n",
       " ('others', 0.00953705909853153),\n",
       " ('prefer', 0.00953705909853153),\n",
       " ('ranked', 0.00953705909853153),\n",
       " ('resources', 0.00953705909853153),\n",
       " ('return', 0.00953705909853153),\n",
       " ('score', 0.00953705909853153),\n",
       " ('sdict', 0.00953705909853153),\n",
       " ('selection', 0.00953705909853153),\n",
       " ('simply', 0.00953705909853153),\n",
       " ('some', 0.00953705909853153),\n",
       " ('step', 0.00953705909853153),\n",
       " ('substantially', 0.00953705909853153),\n",
       " ('targeted', 0.00953705909853153),\n",
       " ('then', 0.00953705909853153),\n",
       " ('they', 0.00953705909853153),\n",
       " ('third', 0.00953705909853153),\n",
       " ('total', 0.00953705909853153),\n",
       " ('two', 0.00953705909853153),\n",
       " ('underestimate', 0.00953705909853153),\n",
       " ('use', 0.00953705909853153),\n",
       " ('via', 0.00953705909853153),\n",
       " ('also', 0.009275573521633149),\n",
       " ('corpus', 0.009275573521633149),\n",
       " ('developers', 0.009275573521633149),\n",
       " ('direct', 0.009275573521633149),\n",
       " ('engine', 0.009275573521633149),\n",
       " ('exhaustively', 0.009275573521633149),\n",
       " ('existing', 0.009275573521633149),\n",
       " ('far', 0.009275573521633149),\n",
       " ('mining', 0.009275573521633149),\n",
       " ('need', 0.009275573521633149),\n",
       " ('patterns', 0.009275573521633149),\n",
       " ('rather', 0.009275573521633149),\n",
       " ('relatively', 0.009275573521633149),\n",
       " ('sets', 0.009275573521633149),\n",
       " ('should', 0.009275573521633149),\n",
       " ('single', 0.009275573521633149),\n",
       " ('style', 0.009275573521633149),\n",
       " ('train', 0.009275573521633149),\n",
       " ('variety', 0.009275573521633149),\n",
       " ('very', 0.009275573521633149),\n",
       " ('well', 0.009275573521633149),\n",
       " ('what', 0.009275573521633149),\n",
       " ('are', 0.008429482357715082),\n",
       " ('other', 0.007485467639206947),\n",
       " ('English', 0.007055930635627913),\n",
       " ('word', 0.007055930635627913),\n",
       " ('Wikipedia', 0.005484060851929443),\n",
       " ('00', 0.0047125704977222516),\n",
       " ('000', 0.0047125704977222516),\n",
       " ('00Average', 0.0047125704977222516),\n",
       " ('00de0', 0.0047125704977222516),\n",
       " ('00es0', 0.0047125704977222516),\n",
       " ('00zh0', 0.0047125704977222516),\n",
       " ('01ar0', 0.0047125704977222516),\n",
       " ('02', 0.0047125704977222516),\n",
       " ('03it0', 0.0047125704977222516),\n",
       " ('04Table', 0.0047125704977222516),\n",
       " ('04de0', 0.0047125704977222516),\n",
       " ('04ja0', 0.0047125704977222516),\n",
       " ('05', 0.0047125704977222516),\n",
       " ('08fr0', 0.0047125704977222516),\n",
       " ('08it0', 0.0047125704977222516),\n",
       " ('09fr0', 0.0047125704977222516),\n",
       " ('0ar0', 0.0047125704977222516),\n",
       " ('0it0', 0.0047125704977222516),\n",
       " ('10', 0.0047125704977222516),\n",
       " ('100', 0.0047125704977222516),\n",
       " ('11ar0', 0.0047125704977222516),\n",
       " ('12Table', 0.0047125704977222516),\n",
       " ('13', 0.0047125704977222516),\n",
       " ('13ja0', 0.0047125704977222516),\n",
       " ('17', 0.0047125704977222516),\n",
       " ('17Average', 0.0047125704977222516),\n",
       " ('19es0', 0.0047125704977222516),\n",
       " ('20', 0.0047125704977222516),\n",
       " ('21', 0.0047125704977222516),\n",
       " ('246', 0.0047125704977222516),\n",
       " ('2Table', 0.0047125704977222516),\n",
       " ('2zh0', 0.0047125704977222516),\n",
       " ('320', 0.0047125704977222516),\n",
       " ('33', 0.0047125704977222516),\n",
       " ('390', 0.0047125704977222516),\n",
       " ('3de1', 0.0047125704977222516),\n",
       " ('41', 0.0047125704977222516),\n",
       " ('47', 0.0047125704977222516),\n",
       " ('48', 0.0047125704977222516),\n",
       " ('550', 0.0047125704977222516),\n",
       " ('61', 0.0047125704977222516),\n",
       " ('62', 0.0047125704977222516),\n",
       " ('63', 0.0047125704977222516),\n",
       " ('630', 0.0047125704977222516),\n",
       " ('690', 0.0047125704977222516),\n",
       " ('6es0', 0.0047125704977222516),\n",
       " ('74', 0.0047125704977222516),\n",
       " ('750', 0.0047125704977222516),\n",
       " ('79', 0.0047125704977222516),\n",
       " ('7fr0', 0.0047125704977222516),\n",
       " ('840', 0.0047125704977222516),\n",
       " ('880', 0.0047125704977222516),\n",
       " ('89', 0.0047125704977222516),\n",
       " ('92', 0.0047125704977222516),\n",
       " ('93', 0.0047125704977222516),\n",
       " ('940', 0.0047125704977222516),\n",
       " ('950', 0.0047125704977222516),\n",
       " ('970', 0.0047125704977222516),\n",
       " ('980', 0.0047125704977222516),\n",
       " ('9ja0', 0.0047125704977222516),\n",
       " ('Among', 0.0047125704977222516),\n",
       " ('Based', 0.0047125704977222516),\n",
       " ('Boolean', 0.0047125704977222516),\n",
       " ('Despite', 0.0047125704977222516),\n",
       " ('Details', 0.0047125704977222516),\n",
       " ('DictionariesQueries', 0.0047125704977222516),\n",
       " ('Each', 0.0047125704977222516),\n",
       " ('Encouragingly', 0.0047125704977222516),\n",
       " ('Examples', 0.0047125704977222516),\n",
       " ('Factors', 0.0047125704977222516),\n",
       " ('February', 0.0047125704977222516),\n",
       " ('Figure', 0.0047125704977222516),\n",
       " ('First', 0.0047125704977222516),\n",
       " ('French', 0.0047125704977222516),\n",
       " ('Given', 0.0047125704977222516),\n",
       " ('Having', 0.0047125704977222516),\n",
       " ('Indri', 0.0047125704977222516),\n",
       " ('January', 0.0047125704977222516),\n",
       " ('Korean', 0.0047125704977222516),\n",
       " ('Language', 0.0047125704977222516),\n",
       " ('Lexical', 0.0047125704977222516),\n",
       " ('Looking', 0.0047125704977222516),\n",
       " ('MeCab', 0.0047125704977222516),\n",
       " ('Morphological', 0.0047125704977222516),\n",
       " ('Most', 0.0047125704977222516),\n",
       " ('Nl', 0.0047125704977222516),\n",
       " ('Note', 0.0047125704977222516),\n",
       " ('Our', 0.0047125704977222516),\n",
       " ('Portuguese', 0.0047125704977222516),\n",
       " ('Pwi', 0.0047125704977222516),\n",
       " ('P𝑝𝑎𝑝𝑒𝑟', 0.0047125704977222516),\n",
       " ('P𝑝𝑎𝑝𝑦𝑟𝑢𝑠', 0.0047125704977222516),\n",
       " ('Recall', 0.0047125704977222516),\n",
       " ('See', 0.0047125704977222516),\n",
       " ('Segmenter', 0.0047125704977222516),\n",
       " ('Stanford', 0.0047125704977222516),\n",
       " ('Swaheli', 0.0047125704977222516),\n",
       " ('Synthetic', 0.0047125704977222516),\n",
       " ('That', 0.0047125704977222516),\n",
       " ('WikipediaBased', 0.0047125704977222516),\n",
       " ('Word', 0.0047125704977222516),\n",
       " ('able', 0.0047125704977222516),\n",
       " ('absence', 0.0047125704977222516),\n",
       " ('accordance', 0.0047125704977222516),\n",
       " ('achieved', 0.0047125704977222516),\n",
       " ('achieving', 0.0047125704977222516),\n",
       " ('actual', 0.0047125704977222516),\n",
       " ('adding', 0.0047125704977222516),\n",
       " ('affected', 0.0047125704977222516),\n",
       " ('almost', 0.0047125704977222516),\n",
       " ('anchor', 0.0047125704977222516),\n",
       " ('anti', 0.0047125704977222516),\n",
       " ('apply', 0.0047125704977222516),\n",
       " ('arbitrarily', 0.0047125704977222516),\n",
       " ('around', 0.0047125704977222516),\n",
       " ('basic', 0.0047125704977222516),\n",
       " ('because', 0.0047125704977222516),\n",
       " ('below', 0.0047125704977222516),\n",
       " ('between', 0.0047125704977222516),\n",
       " ('billion', 0.0047125704977222516),\n",
       " ('calculate', 0.0047125704977222516),\n",
       " ('calculated', 0.0047125704977222516),\n",
       " ('calculation', 0.0047125704977222516),\n",
       " ('carried', 0.0047125704977222516),\n",
       " ('certainly', 0.0047125704977222516),\n",
       " ('class', 0.0047125704977222516),\n",
       " ('cod', 0.0047125704977222516),\n",
       " ('collected', 0.0047125704977222516),\n",
       " ('collocate', 0.0047125704977222516),\n",
       " ('combined', 0.0047125704977222516),\n",
       " ('combining', 0.0047125704977222516),\n",
       " ('community', 0.0047125704977222516),\n",
       " ('comparably', 0.0047125704977222516),\n",
       " ('compiled', 0.0047125704977222516),\n",
       " ('complement', 0.0047125704977222516),\n",
       " ('considerably', 0.0047125704977222516),\n",
       " ('consists', 0.0047125704977222516),\n",
       " ('constructed', 0.0047125704977222516),\n",
       " ('cooccur', 0.0047125704977222516),\n",
       " ('count', 0.0047125704977222516),\n",
       " ('creates', 0.0047125704977222516),\n",
       " ('cruiser', 0.0047125704977222516),\n",
       " ('cscore', 0.0047125704977222516),\n",
       " ('cutoff', 0.0047125704977222516),\n",
       " ('decreasing', 0.0047125704977222516),\n",
       " ('describe', 0.0047125704977222516),\n",
       " ('description', 0.0047125704977222516),\n",
       " ('detailed', 0.0047125704977222516),\n",
       " ('details', 0.0047125704977222516),\n",
       " ('developed', 0.0047125704977222516),\n",
       " ('developer', 0.0047125704977222516),\n",
       " ('differ', 0.0047125704977222516),\n",
       " ('discovery', 0.0047125704977222516),\n",
       " ('discriminatory', 0.0047125704977222516),\n",
       " ('do', 0.0047125704977222516),\n",
       " ('documentsNote', 0.0047125704977222516),\n",
       " ('downsampled', 0.0047125704977222516),\n",
       " ('due', 0.0047125704977222516),\n",
       " ('dumps', 0.0047125704977222516),\n",
       " ('easily', 0.0047125704977222516),\n",
       " ('effective', 0.0047125704977222516),\n",
       " ('efficiently', 0.0047125704977222516),\n",
       " ('elements', 0.0047125704977222516),\n",
       " ('end', 0.0047125704977222516),\n",
       " ('especially', 0.0047125704977222516),\n",
       " ('estimated', 0.0047125704977222516),\n",
       " ('evaluated', 0.0047125704977222516),\n",
       " ('evaluates', 0.0047125704977222516),\n",
       " ('evaluating', 0.0047125704977222516),\n",
       " ('filtering', 0.0047125704977222516),\n",
       " ('final', 0.0047125704977222516),\n",
       " ('follow', 0.0047125704977222516),\n",
       " ('formed', 0.0047125704977222516),\n",
       " ('forms', 0.0047125704977222516),\n",
       " ('frequent', 0.0047125704977222516),\n",
       " ('generate', 0.0047125704977222516),\n",
       " ('generation', 0.0047125704977222516),\n",
       " ('genuine', 0.0047125704977222516),\n",
       " ('gospel', 0.0047125704977222516),\n",
       " ('guaranteed', 0.0047125704977222516),\n",
       " ('had', 0.0047125704977222516),\n",
       " ('hand', 0.0047125704977222516),\n",
       " ('highest', 0.0047125704977222516),\n",
       " ('iS', 0.0047125704977222516),\n",
       " ('iff', 0.0047125704977222516),\n",
       " ('impact', 0.0047125704977222516),\n",
       " ('include', 0.0047125704977222516),\n",
       " ('incorporating', 0.0047125704977222516),\n",
       " ('increasing', 0.0047125704977222516),\n",
       " ('indeed', 0.0047125704977222516),\n",
       " ('indeterminate', 0.0047125704977222516),\n",
       " ('individual', 0.0047125704977222516),\n",
       " ('inflected', 0.0047125704977222516),\n",
       " ('information', 0.0047125704977222516),\n",
       " ('instance', 0.0047125704977222516),\n",
       " ('inverted', 0.0047125704977222516),\n",
       " ('involving', 0.0047125704977222516),\n",
       " ('issue', 0.0047125704977222516),\n",
       " ('itemset', 0.0047125704977222516),\n",
       " ('iteration', 0.0047125704977222516),\n",
       " ('ja', 0.0047125704977222516),\n",
       " ('knowledge', 0.0047125704977222516),\n",
       " ('ko', 0.0047125704977222516),\n",
       " ('lNl', 0.0047125704977222516),\n",
       " ('learned', 0.0047125704977222516),\n",
       " ('learnedAvg', 0.0047125704977222516),\n",
       " ('lemmas', 0.0047125704977222516),\n",
       " ('length', 0.0047125704977222516),\n",
       " ('lengthen3', 0.0047125704977222516),\n",
       " ('lexemes', 0.0047125704977222516),\n",
       " ('lexicographer', 0.0047125704977222516),\n",
       " ('lexicons', 0.0047125704977222516),\n",
       " ('line', 0.0047125704977222516),\n",
       " ('link', 0.0047125704977222516),\n",
       " ('list', 0.0047125704977222516),\n",
       " ('little', 0.0047125704977222516),\n",
       " ('local', 0.0047125704977222516),\n",
       " ('log', 0.0047125704977222516),\n",
       " ('longest', 0.0047125704977222516),\n",
       " ('manual', 0.0047125704977222516),\n",
       " ('mature', 0.0047125704977222516),\n",
       " ('maxlPl', 0.0047125704977222516),\n",
       " ('may', 0.0047125704977222516),\n",
       " ('mean', 0.0047125704977222516),\n",
       " ('million', 0.0047125704977222516),\n",
       " ('minus', 0.0047125704977222516),\n",
       " ('mono', 0.0047125704977222516),\n",
       " ('most', 0.0047125704977222516),\n",
       " ('naively', 0.0047125704977222516),\n",
       " ('name', 0.0047125704977222516),\n",
       " ('needs', 0.0047125704977222516),\n",
       " ('new', 0.0047125704977222516),\n",
       " ('noodle', 0.0047125704977222516),\n",
       " ('noting', 0.0047125704977222516),\n",
       " ('nouns', 0.0047125704977222516),\n",
       " ('obtained', 0.0047125704977222516),\n",
       " ('occurs', 0.0047125704977222516),\n",
       " ('optimisation', 0.0047125704977222516),\n",
       " ('order', 0.0047125704977222516),\n",
       " ('org', 0.0047125704977222516),\n",
       " ('otherwise', 0.0047125704977222516),\n",
       " ('out', 0.0047125704977222516),\n",
       " ('outside', 0.0047125704977222516),\n",
       " ('pages', 0.0047125704977222516),\n",
       " ('pair', 0.0047125704977222516),\n",
       " ('paired', 0.0047125704977222516),\n",
       " ('pairing', 0.0047125704977222516),\n",
       " ('panlex', 0.0047125704977222516),\n",
       " ('paper', 0.0047125704977222516),\n",
       " ('perform', 0.0047125704977222516),\n",
       " ('performed', 0.0047125704977222516),\n",
       " ('predict', 0.0047125704977222516),\n",
       " ('predominantly', 0.0047125704977222516),\n",
       " ('preferred', 0.0047125704977222516),\n",
       " ('presented', 0.0047125704977222516),\n",
       " ('prevalence', 0.0047125704977222516),\n",
       " ('previous', 0.0047125704977222516),\n",
       " ('prior', 0.0047125704977222516),\n",
       " ('priori', 0.0047125704977222516),\n",
       " ('proportion', 0.0047125704977222516),\n",
       " ('proportions', 0.0047125704977222516),\n",
       " ('pruned', 0.0047125704977222516),\n",
       " ('pt', 0.0047125704977222516),\n",
       " ('question', 0.0047125704977222516),\n",
       " ('randomly', 0.0047125704977222516),\n",
       " ('range', 0.0047125704977222516),\n",
       " ('ranking', 0.0047125704977222516),\n",
       " ('records', 0.0047125704977222516),\n",
       " ('regular', 0.0047125704977222516),\n",
       " ('reject', 0.0047125704977222516),\n",
       " ('relative', 0.0047125704977222516),\n",
       " ('removed', 0.0047125704977222516),\n",
       " ('respectively', 0.0047125704977222516),\n",
       " ('restricting', 0.0047125704977222516),\n",
       " ('resultant', 0.0047125704977222516),\n",
       " ('retrieve', 0.0047125704977222516),\n",
       " ('retrieved', 0.0047125704977222516),\n",
       " ('returns', 0.0047125704977222516),\n",
       " ('said', 0.0047125704977222516),\n",
       " ('satisfying', 0.0047125704977222516),\n",
       " ('scope', 0.0047125704977222516),\n",
       " ('sdictwi', 0.0047125704977222516),\n",
       " ('segmentation', 0.0047125704977222516),\n",
       " ('select', 0.0047125704977222516),\n",
       " ('selected', 0.0047125704977222516),\n",
       " ('selective', 0.0047125704977222516),\n",
       " ('simple', 0.0047125704977222516),\n",
       " ('small', 0.0047125704977222516),\n",
       " ('smallerLangDictsMAPBaselineen0', 0.0047125704977222516),\n",
       " ('smallerLangDictsMAPBaselinezh0', 0.0047125704977222516),\n",
       " ('smallerLangWikipedia', 0.0047125704977222516),\n",
       " ('smallerLanguageProportionen', 0.0047125704977222516),\n",
       " ('so', 0.0047125704977222516),\n",
       " ('sophistication', 0.0047125704977222516),\n",
       " ('sourced', 0.0047125704977222516),\n",
       " ('specialised', 0.0047125704977222516),\n",
       " ('subset', 0.0047125704977222516),\n",
       " ('successful', 0.0047125704977222516),\n",
       " ('suggest', 0.0047125704977222516),\n",
       " ('sushi', 0.0047125704977222516),\n",
       " ('suspect', 0.0047125704977222516),\n",
       " ('system', 0.0047125704977222516),\n",
       " ('systematicity', 0.0047125704977222516),\n",
       " ('ten', 0.0047125704977222516),\n",
       " ('tend', 0.0047125704977222516),\n",
       " ('terminology', 0.0047125704977222516),\n",
       " ('text', 0.0047125704977222516),\n",
       " ('them', 0.0047125704977222516),\n",
       " ('three', 0.0047125704977222516),\n",
       " ('threshold', 0.0047125704977222516),\n",
       " ('thus', 0.0047125704977222516),\n",
       " ('together', 0.0047125704977222516),\n",
       " ('transliterated', 0.0047125704977222516),\n",
       " ('typicality', 0.0047125704977222516),\n",
       " ('underlying', 0.0047125704977222516),\n",
       " ('unique', 0.0047125704977222516),\n",
       " ('until', 0.0047125704977222516),\n",
       " ('usable', 0.0047125704977222516),\n",
       " ('value', 0.0047125704977222516),\n",
       " ('values', 0.0047125704977222516),\n",
       " ('varies', 0.0047125704977222516),\n",
       " ('verbs', 0.0047125704977222516),\n",
       " ('vocabulary', 0.0047125704977222516),\n",
       " ('want', 0.0047125704977222516),\n",
       " ('way', 0.0047125704977222516),\n",
       " ('ways', 0.0047125704977222516),\n",
       " ('weighing', 0.0047125704977222516),\n",
       " ('weight', 0.0047125704977222516),\n",
       " ('whereas', 0.0047125704977222516),\n",
       " ('whereby', 0.0047125704977222516),\n",
       " ('while', 0.0047125704977222516),\n",
       " ('wide', 0.0047125704977222516),\n",
       " ('wiotherwisewhere', 0.0047125704977222516),\n",
       " ('within', 0.0047125704977222516),\n",
       " ('wj', 0.0047125704977222516),\n",
       " ('wn', 0.0047125704977222516),\n",
       " ('zero', 0.0047125704977222516),\n",
       " ('zh', 0.0047125704977222516),\n",
       " ('ability', 0.002987279569459114),\n",
       " ('articles', 0.002987279569459114),\n",
       " ('assumption', 0.002987279569459114),\n",
       " ('both', 0.002987279569459114),\n",
       " ('combination', 0.002987279569459114),\n",
       " ('conventional', 0.002987279569459114),\n",
       " ('different', 0.002987279569459114),\n",
       " ('few', 0.002987279569459114),\n",
       " ('highly', 0.002987279569459114),\n",
       " ('into', 0.002987279569459114),\n",
       " ('often', 0.002987279569459114),\n",
       " ('pairs', 0.002987279569459114),\n",
       " ('present', 0.002987279569459114),\n",
       " ('was', 0.002987279569459114),\n",
       " ('an', 0.0020667756515002217),\n",
       " ('containing', 0.0006274751709893084),\n",
       " ('training', 0.0006274751709893084),\n",
       " ('using', 0.0006274751709893084),\n",
       " ('where', 0.0005508130761294888),\n",
       " ('In', 5.85339765396542e-06),\n",
       " ('no', 5.85339765396542e-06),\n",
       " ('open', 5.85339765396542e-06),\n",
       " ('potentially', 5.85339765396542e-06),\n",
       " ('search', 5.85339765396542e-06)]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorted(InfoF().gi_f(_input), key=lambda x: x[1], reverse=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('work', 14.24359779325141),\n",
       " ('been', 11.070823699703396),\n",
       " ('classification', 11.070823699703396),\n",
       " ('content', 11.070823699703396),\n",
       " ('genre', 11.070823699703396),\n",
       " ('methods', 11.070823699703396),\n",
       " ('wi', 8.865552026052228),\n",
       " ('our', 8.399578991137787),\n",
       " ('there', 8.113331161598126),\n",
       " ('automatically', 7.901282010113391),\n",
       " ('parallel', 7.901282010113391),\n",
       " ('to', 7.107287855365939),\n",
       " ('document', 6.8839429180225125),\n",
       " ('each', 6.537882468353473),\n",
       " ('for', 6.401964857936036),\n",
       " ('research', 5.937904297431032),\n",
       " ('queries', 5.608348670396026),\n",
       " ('query', 5.608348670396026),\n",
       " ('identify', 5.449965189930481),\n",
       " ('proposed', 5.449965189930481),\n",
       " ('relevant', 5.449965189930481),\n",
       " ('on', 5.339193003916989),\n",
       " ('language', 5.0095960760131675),\n",
       " ('While', 4.734964825445786),\n",
       " ('although', 4.734964825445786),\n",
       " ('automatic', 4.734964825445786),\n",
       " ('corpora', 4.734964825445786),\n",
       " ('crosslingual', 4.734964825445786),\n",
       " ('detect', 4.734964825445786),\n",
       " ('detection', 4.734964825445786),\n",
       " ('features', 4.734964825445786),\n",
       " ('generally', 4.734964825445786),\n",
       " ('has', 4.734964825445786),\n",
       " ('mixture', 4.734964825445786),\n",
       " ('specific', 4.734964825445786),\n",
       " ('structural', 4.734964825445786),\n",
       " ('technical', 4.734964825445786),\n",
       " ('thesauri', 4.734964825445786),\n",
       " ('type', 4.734964825445786),\n",
       " ('words', 4.679688973863449),\n",
       " ('as', 4.524137534378951),\n",
       " ('bilingual', 4.2591832720277125),\n",
       " ('en', 4.215686397898935),\n",
       " ('in', 4.104984855243856),\n",
       " ('we', 3.948075098467598),\n",
       " ('from', 3.6780815830570646),\n",
       " ('have', 3.403802124448248),\n",
       " ('Dl', 3.288334761668011),\n",
       " ('collection', 3.288334761668011),\n",
       " ('set', 3.288334761668011),\n",
       " ('the', 3.267047320009624),\n",
       " ('but', 2.964076445436149),\n",
       " ('construct', 2.964076445436149),\n",
       " ('domain', 2.964076445436149),\n",
       " ('its', 2.964076445436149),\n",
       " ('low', 2.964076445436149),\n",
       " ('used', 2.964076445436149),\n",
       " ('based', 2.9365514657024505),\n",
       " ('Ll', 2.8249852907160857),\n",
       " ('returned', 2.8249852907160857),\n",
       " ('than', 2.8249852907160857),\n",
       " ('were', 2.8249852907160857),\n",
       " ('not', 2.7391937450775004),\n",
       " ('The', 2.616318945975763),\n",
       " ('Apriori', 2.361853111239725),\n",
       " ('ClueWeb09', 2.361853111239725),\n",
       " ('For', 2.361853111239725),\n",
       " ('Table', 2.361853111239725),\n",
       " ('dictionaries', 2.261755584472212),\n",
       " ('dataset', 1.9648688819438576),\n",
       " ('MAP', 1.8989380186217204),\n",
       " ('co', 1.8989380186217204),\n",
       " ('first', 1.8989380186217204),\n",
       " ('found', 1.8989380186217204),\n",
       " ('lexical', 1.8989380186217204),\n",
       " ('non', 1.8989380186217204),\n",
       " ('number', 1.8989380186217204),\n",
       " ('only', 1.8989380186217204),\n",
       " ('result', 1.8989380186217204),\n",
       " ('retrieval', 1.8989380186217204),\n",
       " ('target', 1.8989380186217204),\n",
       " ('or', 1.7672344978641377),\n",
       " ('web', 1.7092158631171515),\n",
       " ('This', 1.6983685564855477),\n",
       " ('could', 1.6983685564855477),\n",
       " ('pre', 1.6983685564855477),\n",
       " ('Acknowledgements', 1.5718642797596658),\n",
       " ('Also', 1.5718642797596658),\n",
       " ('Alternatively', 1.5718642797596658),\n",
       " ('Australian', 1.5718642797596658),\n",
       " ('Council', 1.5718642797596658),\n",
       " ('Eight', 1.5718642797596658),\n",
       " ('Finally', 1.5718642797596658),\n",
       " ('Group', 1.5718642797596658),\n",
       " ('Here', 1.5718642797596658),\n",
       " ('Methods', 1.5718642797596658),\n",
       " ('Panlex', 1.5718642797596658),\n",
       " ('Parallel', 1.5718642797596658),\n",
       " ('Related', 1.5718642797596658),\n",
       " ('Research', 1.5718642797596658),\n",
       " ('Such', 1.5718642797596658),\n",
       " ('adapted', 1.5718642797596658),\n",
       " ('anonymous', 1.5718642797596658),\n",
       " ('applied', 1.5718642797596658),\n",
       " ('assistance', 1.5718642797596658),\n",
       " ('attained', 1.5718642797596658),\n",
       " ('aware', 1.5718642797596658),\n",
       " ('bias', 1.5718642797596658),\n",
       " ('broadly', 1.5718642797596658),\n",
       " ('categories', 1.5718642797596658),\n",
       " ('categorisation', 1.5718642797596658),\n",
       " ('classifies', 1.5718642797596658),\n",
       " ('collections', 1.5718642797596658),\n",
       " ('comments', 1.5718642797596658),\n",
       " ('commonly', 1.5718642797596658),\n",
       " ('course', 1.5718642797596658),\n",
       " ('credible', 1.5718642797596658),\n",
       " ('density', 1.5718642797596658),\n",
       " ('described', 1.5718642797596658),\n",
       " ('design', 1.5718642797596658),\n",
       " ('designed', 1.5718642797596658),\n",
       " ('detecting', 1.5718642797596658),\n",
       " ('develop', 1.5718642797596658),\n",
       " ('distributional', 1.5718642797596658),\n",
       " ('downside', 1.5718642797596658),\n",
       " ('exist', 1.5718642797596658),\n",
       " ('experiment', 1.5718642797596658),\n",
       " ('experimental', 1.5718642797596658),\n",
       " ('explore', 1.5718642797596658),\n",
       " ('extract', 1.5718642797596658),\n",
       " ('falls', 1.5718642797596658),\n",
       " ('filter', 1.5718642797596658),\n",
       " ('form', 1.5718642797596658),\n",
       " ('four', 1.5718642797596658),\n",
       " ('franca', 1.5718642797596658),\n",
       " ('funding', 1.5718642797596658),\n",
       " ('future', 1.5718642797596658),\n",
       " ('general', 1.5718642797596658),\n",
       " ('glossed', 1.5718642797596658),\n",
       " ('glossing', 1.5718642797596658),\n",
       " ('guarantee', 1.5718642797596658),\n",
       " ('high', 1.5718642797596658),\n",
       " ('hope', 1.5718642797596658),\n",
       " ('identification', 1.5718642797596658),\n",
       " ('identifying', 1.5718642797596658),\n",
       " ('immediately', 1.5718642797596658),\n",
       " ('initial', 1.5718642797596658),\n",
       " ('judged', 1.5718642797596658),\n",
       " ('label', 1.5718642797596658),\n",
       " ('lines', 1.5718642797596658),\n",
       " ('lingua', 1.5718642797596658),\n",
       " ('lists', 1.5718642797596658),\n",
       " ('locate', 1.5718642797596658),\n",
       " ('mix', 1.5718642797596658),\n",
       " ('multi', 1.5718642797596658),\n",
       " ('namely', 1.5718642797596658),\n",
       " ('novel', 1.5718642797596658),\n",
       " ('obvious', 1.5718642797596658),\n",
       " ('particular', 1.5718642797596658),\n",
       " ('possible', 1.5718642797596658),\n",
       " ('predictable', 1.5718642797596658),\n",
       " ('proceeded', 1.5718642797596658),\n",
       " ('realistically', 1.5718642797596658),\n",
       " ('recall', 1.5718642797596658),\n",
       " ('reviewers', 1.5718642797596658),\n",
       " ('same', 1.5718642797596658),\n",
       " ('seeks', 1.5718642797596658),\n",
       " ('similarity', 1.5718642797596658),\n",
       " ('site', 1.5718642797596658),\n",
       " ('source', 1.5718642797596658),\n",
       " ('specifically', 1.5718642797596658),\n",
       " ('strong', 1.5718642797596658),\n",
       " ('suited', 1.5718642797596658),\n",
       " ('supported', 1.5718642797596658),\n",
       " ('task', 1.5718642797596658),\n",
       " ('thank', 1.5718642797596658),\n",
       " ('theoretically', 1.5718642797596658),\n",
       " ('towards', 1.5718642797596658),\n",
       " ('valuable', 1.5718642797596658),\n",
       " ('wish', 1.5718642797596658),\n",
       " ('would', 1.5718642797596658),\n",
       " ('As', 1.4362398085381756),\n",
       " ('Google', 1.4362398085381756),\n",
       " ('algorithm', 1.4362398085381756),\n",
       " ('approach', 1.4362398085381756),\n",
       " ('at', 1.4362398085381756),\n",
       " ('combinations', 1.4362398085381756),\n",
       " ('if', 1.4362398085381756),\n",
       " ('likelihood', 1.4362398085381756),\n",
       " ('occurrence', 1.4362398085381756),\n",
       " ('scores', 1.4362398085381756),\n",
       " ('support', 1.4362398085381756),\n",
       " ('wj', 1.4362398085381756),\n",
       " ('dictionary', 1.3884589685185347),\n",
       " ('method', 1.32258208498979),\n",
       " ('over', 1.2031343628646027),\n",
       " ('results', 1.1537026476930805),\n",
       " ('with', 1.1537026476930805),\n",
       " ('is', 1.0973195342148756),\n",
       " ('languages', 1.0897947334815399),\n",
       " ('synthetic', 1.0776406690192744),\n",
       " ('word', 1.0776406690192744),\n",
       " ('by', 1.0565674675622176),\n",
       " ('being', 0.9740294552184423),\n",
       " ('more', 0.9740294552184423),\n",
       " ('term', 0.9740294552184423),\n",
       " ('these', 0.9740294552184423),\n",
       " ('770', 0.9737582769557775),\n",
       " ('Japanese', 0.9737582769557775),\n",
       " ('against', 0.9737582769557775),\n",
       " ('any', 0.9737582769557775),\n",
       " ('average', 0.9737582769557775),\n",
       " ('baseline', 0.9737582769557775),\n",
       " ('certain', 0.9737582769557775),\n",
       " ('criterion', 0.9737582769557775),\n",
       " ('de', 0.9737582769557775),\n",
       " ('evaluation', 0.9737582769557775),\n",
       " ('experiments', 0.9737582769557775),\n",
       " ('formulation', 0.9737582769557775),\n",
       " ('fr', 0.9737582769557775),\n",
       " ('higher', 0.9737582769557775),\n",
       " ('including', 0.9737582769557775),\n",
       " ('intuition', 0.9737582769557775),\n",
       " ('item', 0.9737582769557775),\n",
       " ('learned', 0.9737582769557775),\n",
       " ('least', 0.9737582769557775),\n",
       " ('lexicon', 0.9737582769557775),\n",
       " ('occur', 0.9737582769557775),\n",
       " ('one', 0.9737582769557775),\n",
       " ('original', 0.9737582769557775),\n",
       " ('performed', 0.9737582769557775),\n",
       " ('querying', 0.9737582769557775),\n",
       " ('random', 0.9737582769557775),\n",
       " ('sdict', 0.9737582769557775),\n",
       " ('second', 0.9737582769557775),\n",
       " ('size', 0.9737582769557775),\n",
       " ('standard', 0.9737582769557775),\n",
       " ('those', 0.9737582769557775),\n",
       " ('true', 0.9737582769557775),\n",
       " ('two', 0.9737582769557775),\n",
       " ('whether', 0.9737582769557775),\n",
       " ('also', 0.8483041340582531),\n",
       " ('corpus', 0.8483041340582531),\n",
       " ('developers', 0.8483041340582531),\n",
       " ('direct', 0.8483041340582531),\n",
       " ('engine', 0.8483041340582531),\n",
       " ('exhaustively', 0.8483041340582531),\n",
       " ('existing', 0.8483041340582531),\n",
       " ('far', 0.8483041340582531),\n",
       " ('mining', 0.8483041340582531),\n",
       " ('need', 0.8483041340582531),\n",
       " ('patterns', 0.8483041340582531),\n",
       " ('rather', 0.8483041340582531),\n",
       " ('relatively', 0.8483041340582531),\n",
       " ('sets', 0.8483041340582531),\n",
       " ('should', 0.8483041340582531),\n",
       " ('single', 0.8483041340582531),\n",
       " ('style', 0.8483041340582531),\n",
       " ('train', 0.8483041340582531),\n",
       " ('variety', 0.8483041340582531),\n",
       " ('very', 0.8483041340582531),\n",
       " ('well', 0.8483041340582531),\n",
       " ('what', 0.8483041340582531),\n",
       " ('be', 0.7448488650784384),\n",
       " ('which', 0.7448488650784384),\n",
       " ('English', 0.5749745501902908),\n",
       " ('Wikipedia', 0.5749745501902908),\n",
       " ('such', 0.5267920115848028),\n",
       " ('terms', 0.5267920115848028),\n",
       " ('10', 0.5114932201267948),\n",
       " ('100', 0.5114932201267948),\n",
       " ('1000', 0.5114932201267948),\n",
       " ('13', 0.5114932201267948),\n",
       " ('2009', 0.5114932201267948),\n",
       " ('32', 0.5114932201267948),\n",
       " ('49', 0.5114932201267948),\n",
       " ('50', 0.5114932201267948),\n",
       " ('52', 0.5114932201267948),\n",
       " ('62', 0.5114932201267948),\n",
       " ('63', 0.5114932201267948),\n",
       " ('920', 0.5114932201267948),\n",
       " ('API', 0.5114932201267948),\n",
       " ('Arabic', 0.5114932201267948),\n",
       " ('Below', 0.5114932201267948),\n",
       " ('Chinese', 0.5114932201267948),\n",
       " ('Dictionary', 0.5114932201267948),\n",
       " ('Dicts', 0.5114932201267948),\n",
       " ('Figure', 0.5114932201267948),\n",
       " ('Freedict', 0.5114932201267948),\n",
       " ('German', 0.5114932201267948),\n",
       " ('Italian', 0.5114932201267948),\n",
       " ('Korean', 0.5114932201267948),\n",
       " ('Portuguese', 0.5114932201267948),\n",
       " ('Score', 0.5114932201267948),\n",
       " ('Spanish', 0.5114932201267948),\n",
       " ('To', 0.5114932201267948),\n",
       " ('access', 0.5114932201267948),\n",
       " ('ar', 0.5114932201267948),\n",
       " ('assume', 0.5114932201267948),\n",
       " ('background', 0.5114932201267948),\n",
       " ('best', 0.5114932201267948),\n",
       " ('cod', 0.5114932201267948),\n",
       " ('component', 0.5114932201267948),\n",
       " ('contained', 0.5114932201267948),\n",
       " ('contains', 0.5114932201267948),\n",
       " ('coverage', 0.5114932201267948),\n",
       " ('data', 0.5114932201267948),\n",
       " ('detail', 0.5114932201267948),\n",
       " ('determination', 0.5114932201267948),\n",
       " ('discriminating', 0.5114932201267948),\n",
       " ('effectiveness', 0.5114932201267948),\n",
       " ('efficiently', 0.5114932201267948),\n",
       " ('empty', 0.5114932201267948),\n",
       " ('es', 0.5114932201267948),\n",
       " ('evaluate', 0.5114932201267948),\n",
       " ('follows', 0.5114932201267948),\n",
       " ('function', 0.5114932201267948),\n",
       " ('greater', 0.5114932201267948),\n",
       " ('included', 0.5114932201267948),\n",
       " ('index', 0.5114932201267948),\n",
       " ('indexed', 0.5114932201267948),\n",
       " ('indicated', 0.5114932201267948),\n",
       " ('injected', 0.5114932201267948),\n",
       " ('interest', 0.5114932201267948),\n",
       " ('iteration', 0.5114932201267948),\n",
       " ('largely', 0.5114932201267948),\n",
       " ('likely', 0.5114932201267948),\n",
       " ('lower', 0.5114932201267948),\n",
       " ('may', 0.5114932201267948),\n",
       " ('mean', 0.5114932201267948),\n",
       " ('methodology', 0.5114932201267948),\n",
       " ('modified', 0.5114932201267948),\n",
       " ('monolingual', 0.5114932201267948),\n",
       " ('next', 0.5114932201267948),\n",
       " ('others', 0.5114932201267948),\n",
       " ('prefer', 0.5114932201267948),\n",
       " ('proportions', 0.5114932201267948),\n",
       " ('ranked', 0.5114932201267948),\n",
       " ('records', 0.5114932201267948),\n",
       " ('resources', 0.5114932201267948),\n",
       " ('return', 0.5114932201267948),\n",
       " ('score', 0.5114932201267948),\n",
       " ('selection', 0.5114932201267948),\n",
       " ('simply', 0.5114932201267948),\n",
       " ('some', 0.5114932201267948),\n",
       " ('step', 0.5114932201267948),\n",
       " ('substantially', 0.5114932201267948),\n",
       " ('targeted', 0.5114932201267948),\n",
       " ('tend', 0.5114932201267948),\n",
       " ('then', 0.5114932201267948),\n",
       " ('they', 0.5114932201267948),\n",
       " ('third', 0.5114932201267948),\n",
       " ('total', 0.5114932201267948),\n",
       " ('underestimate', 0.5114932201267948),\n",
       " ('unique', 0.5114932201267948),\n",
       " ('use', 0.5114932201267948),\n",
       " ('via', 0.5114932201267948),\n",
       " ('weight', 0.5114932201267948),\n",
       " ('are', 0.4710521580286695),\n",
       " ('multilingual', 0.31735706211929937),\n",
       " ('ability', 0.26302468528001555),\n",
       " ('analysis', 0.26302468528001555),\n",
       " ('articles', 0.26302468528001555),\n",
       " ('assumption', 0.26302468528001555),\n",
       " ('both', 0.26302468528001555),\n",
       " ('can', 0.26302468528001555),\n",
       " ('combination', 0.26302468528001555),\n",
       " ('comparable', 0.26302468528001555),\n",
       " ('conventional', 0.26302468528001555),\n",
       " ('different', 0.26302468528001555),\n",
       " ('few', 0.26302468528001555),\n",
       " ('highly', 0.26302468528001555),\n",
       " ('into', 0.26302468528001555),\n",
       " ('often', 0.26302468528001555),\n",
       " ('pairs', 0.26302468528001555),\n",
       " ('precision', 0.26302468528001555),\n",
       " ('present', 0.26302468528001555),\n",
       " ('was', 0.26302468528001555),\n",
       " ('We', 0.2502196813752562),\n",
       " ('all', 0.2502196813752562),\n",
       " ('it', 0.2502196813752562),\n",
       " ('given', 0.23433378516165249),\n",
       " ('documents', 0.21411036925474036),\n",
       " ('that', 0.19270527636399493),\n",
       " ('an', 0.1875098780051303),\n",
       " ('contain', 0.1875098780051303),\n",
       " ('of', 0.09564835866513022),\n",
       " ('construction', 0.08987968620431275),\n",
       " ('containing', 0.06032206838517595),\n",
       " ('training', 0.06032206838517595),\n",
       " ('using', 0.06032206838517595),\n",
       " ('00', 0.049444434596807696),\n",
       " ('000', 0.049444434596807696),\n",
       " ('00Average', 0.049444434596807696),\n",
       " ('00de0', 0.049444434596807696),\n",
       " ('00es0', 0.049444434596807696),\n",
       " ('00zh0', 0.049444434596807696),\n",
       " ('01ar0', 0.049444434596807696),\n",
       " ('02', 0.049444434596807696),\n",
       " ('03it0', 0.049444434596807696),\n",
       " ('04Table', 0.049444434596807696),\n",
       " ('04de0', 0.049444434596807696),\n",
       " ('04ja0', 0.049444434596807696),\n",
       " ('05', 0.049444434596807696),\n",
       " ('08fr0', 0.049444434596807696),\n",
       " ('08it0', 0.049444434596807696),\n",
       " ('09fr0', 0.049444434596807696),\n",
       " ('0ar0', 0.049444434596807696),\n",
       " ('0it0', 0.049444434596807696),\n",
       " ('11ar0', 0.049444434596807696),\n",
       " ('12Table', 0.049444434596807696),\n",
       " ('13ja0', 0.049444434596807696),\n",
       " ('17', 0.049444434596807696),\n",
       " ('17Average', 0.049444434596807696),\n",
       " ('19es0', 0.049444434596807696),\n",
       " ('20', 0.049444434596807696),\n",
       " ('21', 0.049444434596807696),\n",
       " ('246', 0.049444434596807696),\n",
       " ('2Table', 0.049444434596807696),\n",
       " ('2zh0', 0.049444434596807696),\n",
       " ('320', 0.049444434596807696),\n",
       " ('33', 0.049444434596807696),\n",
       " ('390', 0.049444434596807696),\n",
       " ('3de1', 0.049444434596807696),\n",
       " ('41', 0.049444434596807696),\n",
       " ('47', 0.049444434596807696),\n",
       " ('48', 0.049444434596807696),\n",
       " ('550', 0.049444434596807696),\n",
       " ('61', 0.049444434596807696),\n",
       " ('630', 0.049444434596807696),\n",
       " ('690', 0.049444434596807696),\n",
       " ('6es0', 0.049444434596807696),\n",
       " ('74', 0.049444434596807696),\n",
       " ('750', 0.049444434596807696),\n",
       " ('79', 0.049444434596807696),\n",
       " ('7fr0', 0.049444434596807696),\n",
       " ('840', 0.049444434596807696),\n",
       " ('880', 0.049444434596807696),\n",
       " ('89', 0.049444434596807696),\n",
       " ('92', 0.049444434596807696),\n",
       " ('93', 0.049444434596807696),\n",
       " ('940', 0.049444434596807696),\n",
       " ('950', 0.049444434596807696),\n",
       " ('970', 0.049444434596807696),\n",
       " ('980', 0.049444434596807696),\n",
       " ('9ja0', 0.049444434596807696),\n",
       " ('Among', 0.049444434596807696),\n",
       " ('Based', 0.049444434596807696),\n",
       " ('Boolean', 0.049444434596807696),\n",
       " ('Despite', 0.049444434596807696),\n",
       " ('Details', 0.049444434596807696),\n",
       " ('DictionariesQueries', 0.049444434596807696),\n",
       " ('Each', 0.049444434596807696),\n",
       " ('Encouragingly', 0.049444434596807696),\n",
       " ('Examples', 0.049444434596807696),\n",
       " ('Factors', 0.049444434596807696),\n",
       " ('February', 0.049444434596807696),\n",
       " ('First', 0.049444434596807696),\n",
       " ('French', 0.049444434596807696),\n",
       " ('Given', 0.049444434596807696),\n",
       " ('Having', 0.049444434596807696),\n",
       " ('Indri', 0.049444434596807696),\n",
       " ('January', 0.049444434596807696),\n",
       " ('Language', 0.049444434596807696),\n",
       " ('Lexical', 0.049444434596807696),\n",
       " ('Looking', 0.049444434596807696),\n",
       " ('MeCab', 0.049444434596807696),\n",
       " ('Morphological', 0.049444434596807696),\n",
       " ('Most', 0.049444434596807696),\n",
       " ('Nl', 0.049444434596807696),\n",
       " ('Note', 0.049444434596807696),\n",
       " ('Our', 0.049444434596807696),\n",
       " ('Pwi', 0.049444434596807696),\n",
       " ('P𝑝𝑎𝑝𝑒𝑟', 0.049444434596807696),\n",
       " ('P𝑝𝑎𝑝𝑦𝑟𝑢𝑠', 0.049444434596807696),\n",
       " ('Recall', 0.049444434596807696),\n",
       " ('See', 0.049444434596807696),\n",
       " ('Segmenter', 0.049444434596807696),\n",
       " ('Stanford', 0.049444434596807696),\n",
       " ('Swaheli', 0.049444434596807696),\n",
       " ('Synthetic', 0.049444434596807696),\n",
       " ('That', 0.049444434596807696),\n",
       " ('WikipediaBased', 0.049444434596807696),\n",
       " ('Word', 0.049444434596807696),\n",
       " ('able', 0.049444434596807696),\n",
       " ('absence', 0.049444434596807696),\n",
       " ('accordance', 0.049444434596807696),\n",
       " ('achieved', 0.049444434596807696),\n",
       " ('achieving', 0.049444434596807696),\n",
       " ('actual', 0.049444434596807696),\n",
       " ('adding', 0.049444434596807696),\n",
       " ('affected', 0.049444434596807696),\n",
       " ('almost', 0.049444434596807696),\n",
       " ('anchor', 0.049444434596807696),\n",
       " ('anti', 0.049444434596807696),\n",
       " ('apply', 0.049444434596807696),\n",
       " ('arbitrarily', 0.049444434596807696),\n",
       " ('around', 0.049444434596807696),\n",
       " ('basic', 0.049444434596807696),\n",
       " ('because', 0.049444434596807696),\n",
       " ('below', 0.049444434596807696),\n",
       " ('between', 0.049444434596807696),\n",
       " ('billion', 0.049444434596807696),\n",
       " ('calculate', 0.049444434596807696),\n",
       " ('calculated', 0.049444434596807696),\n",
       " ('calculation', 0.049444434596807696),\n",
       " ('carried', 0.049444434596807696),\n",
       " ('certainly', 0.049444434596807696),\n",
       " ('class', 0.049444434596807696),\n",
       " ('collected', 0.049444434596807696),\n",
       " ('collocate', 0.049444434596807696),\n",
       " ('combined', 0.049444434596807696),\n",
       " ('combining', 0.049444434596807696),\n",
       " ('community', 0.049444434596807696),\n",
       " ('comparably', 0.049444434596807696),\n",
       " ('compiled', 0.049444434596807696),\n",
       " ('complement', 0.049444434596807696),\n",
       " ('considerably', 0.049444434596807696),\n",
       " ('consists', 0.049444434596807696),\n",
       " ('constructed', 0.049444434596807696),\n",
       " ('cooccur', 0.049444434596807696),\n",
       " ('count', 0.049444434596807696),\n",
       " ('creates', 0.049444434596807696),\n",
       " ('cruiser', 0.049444434596807696),\n",
       " ('cscore', 0.049444434596807696),\n",
       " ('cutoff', 0.049444434596807696),\n",
       " ('decreasing', 0.049444434596807696),\n",
       " ('describe', 0.049444434596807696),\n",
       " ('description', 0.049444434596807696),\n",
       " ('detailed', 0.049444434596807696),\n",
       " ('details', 0.049444434596807696),\n",
       " ('developed', 0.049444434596807696),\n",
       " ('developer', 0.049444434596807696),\n",
       " ('differ', 0.049444434596807696),\n",
       " ('discovery', 0.049444434596807696),\n",
       " ('discriminatory', 0.049444434596807696),\n",
       " ('do', 0.049444434596807696),\n",
       " ('documentsNote', 0.049444434596807696),\n",
       " ('downsampled', 0.049444434596807696),\n",
       " ('due', 0.049444434596807696),\n",
       " ('dumps', 0.049444434596807696),\n",
       " ('easily', 0.049444434596807696),\n",
       " ('effective', 0.049444434596807696),\n",
       " ('elements', 0.049444434596807696),\n",
       " ('end', 0.049444434596807696),\n",
       " ('especially', 0.049444434596807696),\n",
       " ('estimated', 0.049444434596807696),\n",
       " ('evaluated', 0.049444434596807696),\n",
       " ('evaluates', 0.049444434596807696),\n",
       " ('evaluating', 0.049444434596807696),\n",
       " ('filtering', 0.049444434596807696),\n",
       " ('final', 0.049444434596807696),\n",
       " ('follow', 0.049444434596807696),\n",
       " ('formed', 0.049444434596807696),\n",
       " ('forms', 0.049444434596807696),\n",
       " ('frequent', 0.049444434596807696),\n",
       " ('generate', 0.049444434596807696),\n",
       " ('generation', 0.049444434596807696),\n",
       " ('genuine', 0.049444434596807696),\n",
       " ('gospel', 0.049444434596807696),\n",
       " ('guaranteed', 0.049444434596807696),\n",
       " ('had', 0.049444434596807696),\n",
       " ('hand', 0.049444434596807696),\n",
       " ('highest', 0.049444434596807696),\n",
       " ('iS', 0.049444434596807696),\n",
       " ('iff', 0.049444434596807696),\n",
       " ('impact', 0.049444434596807696),\n",
       " ('include', 0.049444434596807696),\n",
       " ('incorporating', 0.049444434596807696),\n",
       " ('increasing', 0.049444434596807696),\n",
       " ('indeed', 0.049444434596807696),\n",
       " ('indeterminate', 0.049444434596807696),\n",
       " ('individual', 0.049444434596807696),\n",
       " ('inflected', 0.049444434596807696),\n",
       " ('information', 0.049444434596807696),\n",
       " ('instance', 0.049444434596807696),\n",
       " ('inverted', 0.049444434596807696),\n",
       " ('involving', 0.049444434596807696),\n",
       " ('issue', 0.049444434596807696),\n",
       " ('itemset', 0.049444434596807696),\n",
       " ('ja', 0.049444434596807696),\n",
       " ('knowledge', 0.049444434596807696),\n",
       " ('ko', 0.049444434596807696),\n",
       " ('lNl', 0.049444434596807696),\n",
       " ('learnedAvg', 0.049444434596807696),\n",
       " ('lemmas', 0.049444434596807696),\n",
       " ('length', 0.049444434596807696),\n",
       " ('lengthen3', 0.049444434596807696),\n",
       " ('lexemes', 0.049444434596807696),\n",
       " ('lexicographer', 0.049444434596807696),\n",
       " ('lexicons', 0.049444434596807696),\n",
       " ('line', 0.049444434596807696),\n",
       " ('link', 0.049444434596807696),\n",
       " ('list', 0.049444434596807696),\n",
       " ('little', 0.049444434596807696),\n",
       " ('local', 0.049444434596807696),\n",
       " ('log', 0.049444434596807696),\n",
       " ('longest', 0.049444434596807696),\n",
       " ('manual', 0.049444434596807696),\n",
       " ('mature', 0.049444434596807696),\n",
       " ('maxlPl', 0.049444434596807696),\n",
       " ('million', 0.049444434596807696),\n",
       " ('minus', 0.049444434596807696),\n",
       " ('mono', 0.049444434596807696),\n",
       " ('most', 0.049444434596807696),\n",
       " ('naively', 0.049444434596807696),\n",
       " ('name', 0.049444434596807696),\n",
       " ('needs', 0.049444434596807696),\n",
       " ('new', 0.049444434596807696),\n",
       " ('noodle', 0.049444434596807696),\n",
       " ('noting', 0.049444434596807696),\n",
       " ('nouns', 0.049444434596807696),\n",
       " ('obtained', 0.049444434596807696),\n",
       " ('occurs', 0.049444434596807696),\n",
       " ('optimisation', 0.049444434596807696),\n",
       " ('order', 0.049444434596807696),\n",
       " ('org', 0.049444434596807696),\n",
       " ('otherwise', 0.049444434596807696),\n",
       " ('out', 0.049444434596807696),\n",
       " ('outside', 0.049444434596807696),\n",
       " ('pages', 0.049444434596807696),\n",
       " ('pair', 0.049444434596807696),\n",
       " ('paired', 0.049444434596807696),\n",
       " ('pairing', 0.049444434596807696),\n",
       " ('panlex', 0.049444434596807696),\n",
       " ('paper', 0.049444434596807696),\n",
       " ('perform', 0.049444434596807696),\n",
       " ('predict', 0.049444434596807696),\n",
       " ('predominantly', 0.049444434596807696),\n",
       " ('preferred', 0.049444434596807696),\n",
       " ('presented', 0.049444434596807696),\n",
       " ('prevalence', 0.049444434596807696),\n",
       " ('previous', 0.049444434596807696),\n",
       " ('prior', 0.049444434596807696),\n",
       " ('priori', 0.049444434596807696),\n",
       " ('proportion', 0.049444434596807696),\n",
       " ('pruned', 0.049444434596807696),\n",
       " ('pt', 0.049444434596807696),\n",
       " ('question', 0.049444434596807696),\n",
       " ('randomly', 0.049444434596807696),\n",
       " ('range', 0.049444434596807696),\n",
       " ('ranking', 0.049444434596807696),\n",
       " ('regular', 0.049444434596807696),\n",
       " ('reject', 0.049444434596807696),\n",
       " ('relative', 0.049444434596807696),\n",
       " ('removed', 0.049444434596807696),\n",
       " ('respectively', 0.049444434596807696),\n",
       " ('restricting', 0.049444434596807696),\n",
       " ('resultant', 0.049444434596807696),\n",
       " ('retrieve', 0.049444434596807696),\n",
       " ('retrieved', 0.049444434596807696),\n",
       " ('returns', 0.049444434596807696),\n",
       " ('said', 0.049444434596807696),\n",
       " ('satisfying', 0.049444434596807696),\n",
       " ('scope', 0.049444434596807696),\n",
       " ('sdictwi', 0.049444434596807696),\n",
       " ('segmentation', 0.049444434596807696),\n",
       " ('select', 0.049444434596807696),\n",
       " ('selected', 0.049444434596807696),\n",
       " ('selective', 0.049444434596807696),\n",
       " ('simple', 0.049444434596807696),\n",
       " ('small', 0.049444434596807696),\n",
       " ('smallerLangDictsMAPBaselineen0', 0.049444434596807696),\n",
       " ('smallerLangDictsMAPBaselinezh0', 0.049444434596807696),\n",
       " ('smallerLangWikipedia', 0.049444434596807696),\n",
       " ('smallerLanguageProportionen', 0.049444434596807696),\n",
       " ('so', 0.049444434596807696),\n",
       " ('sophistication', 0.049444434596807696),\n",
       " ('sourced', 0.049444434596807696),\n",
       " ('specialised', 0.049444434596807696),\n",
       " ('subset', 0.049444434596807696),\n",
       " ('successful', 0.049444434596807696),\n",
       " ('suggest', 0.049444434596807696),\n",
       " ('sushi', 0.049444434596807696),\n",
       " ('suspect', 0.049444434596807696),\n",
       " ('system', 0.049444434596807696),\n",
       " ('systematicity', 0.049444434596807696),\n",
       " ('ten', 0.049444434596807696),\n",
       " ('terminology', 0.049444434596807696),\n",
       " ('text', 0.049444434596807696),\n",
       " ('them', 0.049444434596807696),\n",
       " ('three', 0.049444434596807696),\n",
       " ('threshold', 0.049444434596807696),\n",
       " ('thus', 0.049444434596807696),\n",
       " ('together', 0.049444434596807696),\n",
       " ('transliterated', 0.049444434596807696),\n",
       " ('typicality', 0.049444434596807696),\n",
       " ('underlying', 0.049444434596807696),\n",
       " ('until', 0.049444434596807696),\n",
       " ('usable', 0.049444434596807696),\n",
       " ('value', 0.049444434596807696),\n",
       " ('values', 0.049444434596807696),\n",
       " ('varies', 0.049444434596807696),\n",
       " ('verbs', 0.049444434596807696),\n",
       " ('vocabulary', 0.049444434596807696),\n",
       " ('want', 0.049444434596807696),\n",
       " ('way', 0.049444434596807696),\n",
       " ('ways', 0.049444434596807696),\n",
       " ('weighing', 0.049444434596807696),\n",
       " ('whereas', 0.049444434596807696),\n",
       " ('whereby', 0.049444434596807696),\n",
       " ('while', 0.049444434596807696),\n",
       " ('wide', 0.049444434596807696),\n",
       " ('wiotherwisewhere', 0.049444434596807696),\n",
       " ('within', 0.049444434596807696),\n",
       " ('wn', 0.049444434596807696),\n",
       " ('zero', 0.049444434596807696),\n",
       " ('zh', 0.049444434596807696),\n",
       " ('this', 0.047071199831407284),\n",
       " ('their', 0.04486130193572535),\n",
       " ('where', 0.04486130193572535),\n",
       " ('and', 0.0028274513992982975),\n",
       " ('In', 0.0011671155966723745),\n",
       " ('no', 0.0011671155966723745),\n",
       " ('open', 0.0011671155966723745),\n",
       " ('other', 0.0011671155966723745),\n",
       " ('potentially', 0.0011671155966723745),\n",
       " ('search', 0.0011671155966723745)]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorted(InfoF().llr_f(_input), key=lambda x: x[1], reverse=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "sln-summarizer",
   "language": "python",
   "name": "sln-summarizer"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
